1 // -*- C++ -*- 2 //===----------------------------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H 11 #define _LIBCPP___FORMAT_WRITE_ESCAPED_H 12 13 #include <__algorithm/ranges_copy.h> 14 #include <__algorithm/ranges_for_each.h> 15 #include <__charconv/to_chars_integral.h> 16 #include <__charconv/to_chars_result.h> 17 #include <__chrono/statically_widen.h> 18 #include <__format/escaped_output_table.h> 19 #include <__format/formatter_output.h> 20 #include <__format/parser_std_format_spec.h> 21 #include <__format/unicode.h> 22 #include <__iterator/back_insert_iterator.h> 23 #include <__memory/addressof.h> 24 #include <__system_error/errc.h> 25 #include <__type_traits/make_unsigned.h> 26 #include <__utility/move.h> 27 #include <string_view> 28 29 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 30 # pragma GCC system_header 31 #endif 32 33 _LIBCPP_BEGIN_NAMESPACE_STD 34 35 namespace __formatter { 36 37 #if _LIBCPP_STD_VER >= 20 38 39 /// Writes a string using format's width estimation algorithm. 40 /// 41 /// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the 42 /// input is ASCII. 43 template <class _CharT> 44 _LIBCPP_HIDE_FROM_ABI auto 45 __write_string(basic_string_view<_CharT> __str, 46 output_iterator<const _CharT&> auto __out_it, 47 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 48 if (!__specs.__has_precision()) 49 return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs); 50 51 int __size = __formatter::__truncate(__str, __specs.__precision_); 52 53 return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size); 54 } 55 56 #endif // _LIBCPP_STD_VER >= 20 57 #if _LIBCPP_STD_VER >= 23 58 59 struct __nul_terminator {}; 60 61 template <class _CharT> 62 _LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) { 63 return *__cstr == _CharT('\0'); 64 } 65 66 template <class _CharT> 67 _LIBCPP_HIDE_FROM_ABI void 68 __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) { 69 back_insert_iterator __out_it{__str}; 70 std::ranges::copy(__prefix, __nul_terminator{}, __out_it); 71 72 char __buffer[8]; 73 to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16); 74 _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small"); 75 std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it); 76 77 __str += _CharT('}'); 78 } 79 80 // [format.string.escaped]/2.2.1.2 81 // ... 82 // then the sequence \u{hex-digit-sequence} is appended to E, where 83 // hex-digit-sequence is the shortest hexadecimal representation of C using 84 // lower-case hexadecimal digits. 85 template <class _CharT> 86 _LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) { 87 __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{")); 88 } 89 90 // [format.string.escaped]/2.2.3 91 // Otherwise (X is a sequence of ill-formed code units), each code unit U is 92 // appended to E in order as the sequence \x{hex-digit-sequence}, where 93 // hex-digit-sequence is the shortest hexadecimal representation of U using 94 // lower-case hexadecimal digits. 95 template <class _CharT> 96 _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) { 97 __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{")); 98 } 99 100 template <class _CharT> 101 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) { 102 # ifdef _LIBCPP_HAS_NO_UNICODE 103 // For ASCII assume everything above 127 is printable. 104 if (__value > 127) 105 return false; 106 # endif 107 108 if (!__escaped_output_table::__needs_escape(__value)) 109 return false; 110 111 __formatter::__write_well_formed_escaped_code_unit(__str, __value); 112 return true; 113 } 114 115 template <class _CharT> 116 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) { 117 return static_cast<make_unsigned_t<_CharT>>(__value); 118 } 119 120 enum class __escape_quotation_mark { __apostrophe, __double_quote }; 121 122 // [format.string.escaped]/2 123 template <class _CharT> 124 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool 125 __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) { 126 // 2.2.1.1 - Mapped character in [tab:format.escape.sequences] 127 switch (__value) { 128 case _CharT('\t'): 129 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t"); 130 return true; 131 case _CharT('\n'): 132 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n"); 133 return true; 134 case _CharT('\r'): 135 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r"); 136 return true; 137 case _CharT('\''): 138 if (__mark == __escape_quotation_mark::__apostrophe) 139 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')"); 140 else 141 __str += __value; 142 return true; 143 case _CharT('"'): 144 if (__mark == __escape_quotation_mark::__double_quote) 145 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")"); 146 else 147 __str += __value; 148 return true; 149 case _CharT('\\'): 150 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)"); 151 return true; 152 153 // 2.2.1.2 - Space 154 case _CharT(' '): 155 __str += __value; 156 return true; 157 } 158 159 // 2.2.2 160 // Otherwise, if X is a shift sequence, the effect on E and further 161 // decoding of S is unspecified. 162 // For now shift sequences are ignored and treated as Unicode. Other parts 163 // of the format library do the same. It's unknown how ostream treats them. 164 // TODO FMT determine what to do with shift sequences. 165 166 // 2.2.1.2.1 and 2.2.1.2.2 - Escape 167 return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value)); 168 } 169 170 template <class _CharT> 171 _LIBCPP_HIDE_FROM_ABI void 172 __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) { 173 __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()}; 174 175 while (!__view.__at_end()) { 176 auto __first = __view.__position(); 177 typename __unicode::__consume_result __result = __view.__consume(); 178 if (__result.__status == __unicode::__consume_result::__ok) { 179 if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark)) 180 // 2.2.1.3 - Add the character 181 ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str)); 182 } else { 183 // 2.2.3 sequence of ill-formed code units 184 ranges::for_each(__first, __view.__position(), [&](_CharT __value) { 185 __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value)); 186 }); 187 } 188 } 189 } 190 191 template <class _CharT> 192 _LIBCPP_HIDE_FROM_ABI auto 193 __format_escaped_char(_CharT __value, 194 output_iterator<const _CharT&> auto __out_it, 195 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 196 basic_string<_CharT> __str; 197 __str += _CharT('\''); 198 __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe); 199 __str += _CharT('\''); 200 return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size()); 201 } 202 203 template <class _CharT> 204 _LIBCPP_HIDE_FROM_ABI auto 205 __format_escaped_string(basic_string_view<_CharT> __values, 206 output_iterator<const _CharT&> auto __out_it, 207 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 208 basic_string<_CharT> __str; 209 __str += _CharT('"'); 210 __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote); 211 __str += _CharT('"'); 212 return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs); 213 } 214 215 #endif // _LIBCPP_STD_VER >= 23 216 217 } // namespace __formatter 218 219 _LIBCPP_END_NAMESPACE_STD 220 221 #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H 222