1 // -*- C++ -*- 2 //===----------------------------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H 11 #define _LIBCPP___FORMAT_WRITE_ESCAPED_H 12 13 #include <__algorithm/ranges_copy.h> 14 #include <__algorithm/ranges_for_each.h> 15 #include <__charconv/to_chars_integral.h> 16 #include <__charconv/to_chars_result.h> 17 #include <__chrono/statically_widen.h> 18 #include <__format/escaped_output_table.h> 19 #include <__format/formatter_output.h> 20 #include <__format/parser_std_format_spec.h> 21 #include <__format/unicode.h> 22 #include <__iterator/back_insert_iterator.h> 23 #include <__memory/addressof.h> 24 #include <__system_error/errc.h> 25 #include <__type_traits/make_unsigned.h> 26 #include <__utility/move.h> 27 #include <string_view> 28 29 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 30 # pragma GCC system_header 31 #endif 32 33 _LIBCPP_BEGIN_NAMESPACE_STD 34 35 36 namespace __formatter { 37 38 #if _LIBCPP_STD_VER >= 20 39 40 /// Writes a string using format's width estimation algorithm. 41 /// 42 /// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the 43 /// input is ASCII. 44 template <class _CharT> 45 _LIBCPP_HIDE_FROM_ABI auto __write_string( 46 basic_string_view<_CharT> __str, 47 output_iterator<const _CharT&> auto __out_it, 48 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 49 if (!__specs.__has_precision()) 50 return __formatter::__write_string_no_precision(__str, _VSTD::move(__out_it), __specs); 51 52 int __size = __formatter::__truncate(__str, __specs.__precision_); 53 54 return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size); 55 } 56 57 # endif // _LIBCPP_STD_VER >= 20 58 # if _LIBCPP_STD_VER >= 23 59 60 struct __nul_terminator {}; 61 62 template <class _CharT> 63 _LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) { 64 return *__cstr == _CharT('\0'); 65 } 66 67 template <class _CharT> 68 _LIBCPP_HIDE_FROM_ABI void 69 __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) { 70 back_insert_iterator __out_it{__str}; 71 std::ranges::copy(__prefix, __nul_terminator{}, __out_it); 72 73 char __buffer[8]; 74 to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16); 75 _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small"); 76 std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it); 77 78 __str += _CharT('}'); 79 } 80 81 // [format.string.escaped]/2.2.1.2 82 // ... 83 // then the sequence \u{hex-digit-sequence} is appended to E, where 84 // hex-digit-sequence is the shortest hexadecimal representation of C using 85 // lower-case hexadecimal digits. 86 template <class _CharT> 87 _LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) { 88 __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{")); 89 } 90 91 // [format.string.escaped]/2.2.3 92 // Otherwise (X is a sequence of ill-formed code units), each code unit U is 93 // appended to E in order as the sequence \x{hex-digit-sequence}, where 94 // hex-digit-sequence is the shortest hexadecimal representation of U using 95 // lower-case hexadecimal digits. 96 template <class _CharT> 97 _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) { 98 __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{")); 99 } 100 101 template <class _CharT> 102 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) { 103 # ifdef _LIBCPP_HAS_NO_UNICODE 104 // For ASCII assume everything above 127 is printable. 105 if (__value > 127) 106 return false; 107 # endif 108 109 if (!__escaped_output_table::__needs_escape(__value)) 110 return false; 111 112 __formatter::__write_well_formed_escaped_code_unit(__str, __value); 113 return true; 114 } 115 116 template <class _CharT> 117 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) { 118 return static_cast<make_unsigned_t<_CharT>>(__value); 119 } 120 121 enum class _LIBCPP_ENUM_VIS __escape_quotation_mark { __apostrophe, __double_quote }; 122 123 // [format.string.escaped]/2 124 template <class _CharT> 125 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool 126 __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) { 127 // 2.2.1.1 - Mapped character in [tab:format.escape.sequences] 128 switch (__value) { 129 case _CharT('\t'): 130 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t"); 131 return true; 132 case _CharT('\n'): 133 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n"); 134 return true; 135 case _CharT('\r'): 136 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r"); 137 return true; 138 case _CharT('\''): 139 if (__mark == __escape_quotation_mark::__apostrophe) 140 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')"); 141 else 142 __str += __value; 143 return true; 144 case _CharT('"'): 145 if (__mark == __escape_quotation_mark::__double_quote) 146 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")"); 147 else 148 __str += __value; 149 return true; 150 case _CharT('\\'): 151 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)"); 152 return true; 153 154 // 2.2.1.2 - Space 155 case _CharT(' '): 156 __str += __value; 157 return true; 158 } 159 160 // 2.2.2 161 // Otherwise, if X is a shift sequence, the effect on E and further 162 // decoding of S is unspecified. 163 // For now shift sequences are ignored and treated as Unicode. Other parts 164 // of the format library do the same. It's unknown how ostream treats them. 165 // TODO FMT determine what to do with shift sequences. 166 167 // 2.2.1.2.1 and 2.2.1.2.2 - Escape 168 return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value)); 169 } 170 171 template <class _CharT> 172 _LIBCPP_HIDE_FROM_ABI void 173 __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) { 174 __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()}; 175 176 while (!__view.__at_end()) { 177 auto __first = __view.__position(); 178 typename __unicode::__consume_result __result = __view.__consume(); 179 if (__result.__status == __unicode::__consume_result::__ok) { 180 if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark)) 181 // 2.2.1.3 - Add the character 182 ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str)); 183 } else { 184 // 2.2.3 sequence of ill-formed code units 185 ranges::for_each(__first, __view.__position(), [&](_CharT __value) { 186 __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value)); 187 }); 188 } 189 } 190 } 191 192 template <class _CharT> 193 _LIBCPP_HIDE_FROM_ABI auto 194 __format_escaped_char(_CharT __value, 195 output_iterator<const _CharT&> auto __out_it, 196 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 197 basic_string<_CharT> __str; 198 __str += _CharT('\''); 199 __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe); 200 __str += _CharT('\''); 201 return __formatter::__write(__str.data(), __str.data() + __str.size(), _VSTD::move(__out_it), __specs, __str.size()); 202 } 203 204 template <class _CharT> 205 _LIBCPP_HIDE_FROM_ABI auto 206 __format_escaped_string(basic_string_view<_CharT> __values, 207 output_iterator<const _CharT&> auto __out_it, 208 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 209 basic_string<_CharT> __str; 210 __str += _CharT('"'); 211 __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote); 212 __str += _CharT('"'); 213 return __formatter::__write_string(basic_string_view{__str}, _VSTD::move(__out_it), __specs); 214 } 215 216 # endif // _LIBCPP_STD_VER >= 23 217 218 } // namespace __formatter 219 220 _LIBCPP_END_NAMESPACE_STD 221 222 #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H 223