1 // -*- C++ -*- 2 //===----------------------------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H 11 #define _LIBCPP___FORMAT_WRITE_ESCAPED_H 12 13 #include <__algorithm/ranges_copy.h> 14 #include <__algorithm/ranges_for_each.h> 15 #include <__charconv/to_chars_integral.h> 16 #include <__charconv/to_chars_result.h> 17 #include <__chrono/statically_widen.h> 18 #include <__format/escaped_output_table.h> 19 #include <__format/formatter_output.h> 20 #include <__format/parser_std_format_spec.h> 21 #include <__format/unicode.h> 22 #include <__iterator/back_insert_iterator.h> 23 #include <__memory/addressof.h> 24 #include <__system_error/errc.h> 25 #include <__type_traits/make_unsigned.h> 26 #include <__utility/move.h> 27 #include <string_view> 28 29 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 30 # pragma GCC system_header 31 #endif 32 33 _LIBCPP_PUSH_MACROS 34 #include <__undef_macros> 35 36 _LIBCPP_BEGIN_NAMESPACE_STD 37 38 namespace __formatter { 39 40 #if _LIBCPP_STD_VER >= 20 41 42 /// Writes a string using format's width estimation algorithm. 43 /// 44 /// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the 45 /// input is ASCII. 46 template <class _CharT> 47 _LIBCPP_HIDE_FROM_ABI auto 48 __write_string(basic_string_view<_CharT> __str, 49 output_iterator<const _CharT&> auto __out_it, 50 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 51 if (!__specs.__has_precision()) 52 return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs); 53 54 int __size = __formatter::__truncate(__str, __specs.__precision_); 55 56 return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size); 57 } 58 59 #endif // _LIBCPP_STD_VER >= 20 60 #if _LIBCPP_STD_VER >= 23 61 62 struct __nul_terminator {}; 63 64 template <class _CharT> 65 _LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) { 66 return *__cstr == _CharT('\0'); 67 } 68 69 template <class _CharT> 70 _LIBCPP_HIDE_FROM_ABI void 71 __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) { 72 back_insert_iterator __out_it{__str}; 73 std::ranges::copy(__prefix, __nul_terminator{}, __out_it); 74 75 char __buffer[8]; 76 to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16); 77 _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small"); 78 std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it); 79 80 __str += _CharT('}'); 81 } 82 83 // [format.string.escaped]/2.2.1.2 84 // ... 85 // then the sequence \u{hex-digit-sequence} is appended to E, where 86 // hex-digit-sequence is the shortest hexadecimal representation of C using 87 // lower-case hexadecimal digits. 88 template <class _CharT> 89 _LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) { 90 __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{")); 91 } 92 93 // [format.string.escaped]/2.2.3 94 // Otherwise (X is a sequence of ill-formed code units), each code unit U is 95 // appended to E in order as the sequence \x{hex-digit-sequence}, where 96 // hex-digit-sequence is the shortest hexadecimal representation of U using 97 // lower-case hexadecimal digits. 98 template <class _CharT> 99 _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) { 100 __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{")); 101 } 102 103 template <class _CharT> 104 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) { 105 # ifdef _LIBCPP_HAS_NO_UNICODE 106 // For ASCII assume everything above 127 is printable. 107 if (__value > 127) 108 return false; 109 # endif 110 111 if (!__escaped_output_table::__needs_escape(__value)) 112 return false; 113 114 __formatter::__write_well_formed_escaped_code_unit(__str, __value); 115 return true; 116 } 117 118 template <class _CharT> 119 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) { 120 return static_cast<make_unsigned_t<_CharT>>(__value); 121 } 122 123 enum class __escape_quotation_mark { __apostrophe, __double_quote }; 124 125 // [format.string.escaped]/2 126 template <class _CharT> 127 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool 128 __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) { 129 // 2.2.1.1 - Mapped character in [tab:format.escape.sequences] 130 switch (__value) { 131 case _CharT('\t'): 132 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t"); 133 return true; 134 case _CharT('\n'): 135 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n"); 136 return true; 137 case _CharT('\r'): 138 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r"); 139 return true; 140 case _CharT('\''): 141 if (__mark == __escape_quotation_mark::__apostrophe) 142 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')"); 143 else 144 __str += __value; 145 return true; 146 case _CharT('"'): 147 if (__mark == __escape_quotation_mark::__double_quote) 148 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")"); 149 else 150 __str += __value; 151 return true; 152 case _CharT('\\'): 153 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)"); 154 return true; 155 156 // 2.2.1.2 - Space 157 case _CharT(' '): 158 __str += __value; 159 return true; 160 } 161 162 // 2.2.2 163 // Otherwise, if X is a shift sequence, the effect on E and further 164 // decoding of S is unspecified. 165 // For now shift sequences are ignored and treated as Unicode. Other parts 166 // of the format library do the same. It's unknown how ostream treats them. 167 // TODO FMT determine what to do with shift sequences. 168 169 // 2.2.1.2.1 and 2.2.1.2.2 - Escape 170 return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value)); 171 } 172 173 template <class _CharT> 174 _LIBCPP_HIDE_FROM_ABI void 175 __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) { 176 __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()}; 177 178 while (!__view.__at_end()) { 179 auto __first = __view.__position(); 180 typename __unicode::__consume_result __result = __view.__consume(); 181 if (__result.__status == __unicode::__consume_result::__ok) { 182 if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark)) 183 // 2.2.1.3 - Add the character 184 ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str)); 185 } else { 186 // 2.2.3 sequence of ill-formed code units 187 ranges::for_each(__first, __view.__position(), [&](_CharT __value) { 188 __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value)); 189 }); 190 } 191 } 192 } 193 194 template <class _CharT> 195 _LIBCPP_HIDE_FROM_ABI auto 196 __format_escaped_char(_CharT __value, 197 output_iterator<const _CharT&> auto __out_it, 198 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 199 basic_string<_CharT> __str; 200 __str += _CharT('\''); 201 __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe); 202 __str += _CharT('\''); 203 return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size()); 204 } 205 206 template <class _CharT> 207 _LIBCPP_HIDE_FROM_ABI auto 208 __format_escaped_string(basic_string_view<_CharT> __values, 209 output_iterator<const _CharT&> auto __out_it, 210 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 211 basic_string<_CharT> __str; 212 __str += _CharT('"'); 213 __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote); 214 __str += _CharT('"'); 215 return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs); 216 } 217 218 #endif // _LIBCPP_STD_VER >= 23 219 220 } // namespace __formatter 221 222 _LIBCPP_END_NAMESPACE_STD 223 224 _LIBCPP_POP_MACROS 225 226 #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H 227