1 // -*- C++ -*- 2 //===----------------------------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H 11 #define _LIBCPP___FORMAT_WRITE_ESCAPED_H 12 13 #include <__algorithm/ranges_copy.h> 14 #include <__algorithm/ranges_for_each.h> 15 #include <__charconv/to_chars_integral.h> 16 #include <__charconv/to_chars_result.h> 17 #include <__chrono/statically_widen.h> 18 #include <__format/escaped_output_table.h> 19 #include <__format/formatter_output.h> 20 #include <__format/parser_std_format_spec.h> 21 #include <__format/unicode.h> 22 #include <__iterator/back_insert_iterator.h> 23 #include <__memory/addressof.h> 24 #include <__system_error/errc.h> 25 #include <__type_traits/make_unsigned.h> 26 #include <__utility/move.h> 27 #include <string_view> 28 29 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 30 # pragma GCC system_header 31 #endif 32 33 _LIBCPP_PUSH_MACROS 34 #include <__undef_macros> 35 36 _LIBCPP_BEGIN_NAMESPACE_STD 37 38 namespace __formatter { 39 40 #if _LIBCPP_STD_VER >= 20 41 42 /// Writes a string using format's width estimation algorithm. 43 /// 44 /// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the 45 /// input is ASCII. 46 template <class _CharT> 47 _LIBCPP_HIDE_FROM_ABI auto 48 __write_string(basic_string_view<_CharT> __str, 49 output_iterator<const _CharT&> auto __out_it, 50 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 51 if (!__specs.__has_precision()) 52 return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs); 53 54 int __size = __formatter::__truncate(__str, __specs.__precision_); 55 56 return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size); 57 } 58 59 #endif // _LIBCPP_STD_VER >= 20 60 #if _LIBCPP_STD_VER >= 23 61 62 struct __nul_terminator {}; 63 64 template <class _CharT> 65 _LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) { 66 return *__cstr == _CharT('\0'); 67 } 68 69 template <class _CharT> 70 _LIBCPP_HIDE_FROM_ABI void 71 __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) { 72 back_insert_iterator __out_it{__str}; 73 std::ranges::copy(__prefix, __nul_terminator{}, __out_it); 74 75 char __buffer[8]; 76 to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16); 77 _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small"); 78 std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it); 79 80 __str += _CharT('}'); 81 } 82 83 // [format.string.escaped]/2.2.1.2 84 // ... 85 // then the sequence \u{hex-digit-sequence} is appended to E, where 86 // hex-digit-sequence is the shortest hexadecimal representation of C using 87 // lower-case hexadecimal digits. 88 template <class _CharT> 89 _LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) { 90 __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{")); 91 } 92 93 // [format.string.escaped]/2.2.3 94 // Otherwise (X is a sequence of ill-formed code units), each code unit U is 95 // appended to E in order as the sequence \x{hex-digit-sequence}, where 96 // hex-digit-sequence is the shortest hexadecimal representation of U using 97 // lower-case hexadecimal digits. 98 template <class _CharT> 99 _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) { 100 __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{")); 101 } 102 103 template <class _CharT> 104 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool 105 __is_escaped_sequence_written(basic_string<_CharT>& __str, bool __last_escaped, char32_t __value) { 106 # ifdef _LIBCPP_HAS_NO_UNICODE 107 // For ASCII assume everything above 127 is printable. 108 if (__value > 127) 109 return false; 110 # endif 111 112 // [format.string.escaped]/2.2.1.2.1 113 // CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar 114 // value whose Unicode property General_Category has a value in the groups 115 // Separator (Z) or Other (C), as described by UAX #44 of the Unicode Standard, 116 if (!__escaped_output_table::__needs_escape(__value)) 117 // [format.string.escaped]/2.2.1.2.2 118 // CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar 119 // value with the Unicode property Grapheme_Extend=Yes as described by UAX 120 // #44 of the Unicode Standard and C is not immediately preceded in S by a 121 // character P appended to E without translation to an escape sequence, 122 if (!__last_escaped || __extended_grapheme_custer_property_boundary::__get_property(__value) != 123 __extended_grapheme_custer_property_boundary::__property::__Extend) 124 return false; 125 126 __formatter::__write_well_formed_escaped_code_unit(__str, __value); 127 return true; 128 } 129 130 template <class _CharT> 131 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) { 132 return static_cast<make_unsigned_t<_CharT>>(__value); 133 } 134 135 enum class __escape_quotation_mark { __apostrophe, __double_quote }; 136 137 // [format.string.escaped]/2 138 template <class _CharT> 139 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written( 140 basic_string<_CharT>& __str, char32_t __value, bool __last_escaped, __escape_quotation_mark __mark) { 141 // 2.2.1.1 - Mapped character in [tab:format.escape.sequences] 142 switch (__value) { 143 case _CharT('\t'): 144 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t"); 145 return true; 146 case _CharT('\n'): 147 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n"); 148 return true; 149 case _CharT('\r'): 150 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r"); 151 return true; 152 case _CharT('\''): 153 if (__mark == __escape_quotation_mark::__apostrophe) 154 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')"); 155 else 156 __str += __value; 157 return true; 158 case _CharT('"'): 159 if (__mark == __escape_quotation_mark::__double_quote) 160 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")"); 161 else 162 __str += __value; 163 return true; 164 case _CharT('\\'): 165 __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)"); 166 return true; 167 168 // 2.2.1.2 - Space 169 case _CharT(' '): 170 __str += __value; 171 return true; 172 } 173 174 // 2.2.2 175 // Otherwise, if X is a shift sequence, the effect on E and further 176 // decoding of S is unspecified. 177 // For now shift sequences are ignored and treated as Unicode. Other parts 178 // of the format library do the same. It's unknown how ostream treats them. 179 // TODO FMT determine what to do with shift sequences. 180 181 // 2.2.1.2.1 and 2.2.1.2.2 - Escape 182 return __formatter::__is_escaped_sequence_written(__str, __last_escaped, __formatter::__to_char32(__value)); 183 } 184 185 template <class _CharT> 186 _LIBCPP_HIDE_FROM_ABI void 187 __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) { 188 __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()}; 189 190 // When the first code unit has the property Grapheme_Extend=Yes it needs to 191 // be escaped. This happens when the previous code unit was also escaped. 192 bool __escape = true; 193 while (!__view.__at_end()) { 194 auto __first = __view.__position(); 195 typename __unicode::__consume_result __result = __view.__consume(); 196 if (__result.__status == __unicode::__consume_result::__ok) { 197 __escape = __formatter::__is_escaped_sequence_written(__str, __result.__code_point, __escape, __mark); 198 if (!__escape) 199 // 2.2.1.3 - Add the character 200 ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str)); 201 } else { 202 // 2.2.3 sequence of ill-formed code units 203 ranges::for_each(__first, __view.__position(), [&](_CharT __value) { 204 __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value)); 205 }); 206 } 207 } 208 } 209 210 template <class _CharT> 211 _LIBCPP_HIDE_FROM_ABI auto 212 __format_escaped_char(_CharT __value, 213 output_iterator<const _CharT&> auto __out_it, 214 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 215 basic_string<_CharT> __str; 216 __str += _CharT('\''); 217 __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe); 218 __str += _CharT('\''); 219 return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size()); 220 } 221 222 template <class _CharT> 223 _LIBCPP_HIDE_FROM_ABI auto 224 __format_escaped_string(basic_string_view<_CharT> __values, 225 output_iterator<const _CharT&> auto __out_it, 226 __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 227 basic_string<_CharT> __str; 228 __str += _CharT('"'); 229 __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote); 230 __str += _CharT('"'); 231 return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs); 232 } 233 234 #endif // _LIBCPP_STD_VER >= 23 235 236 } // namespace __formatter 237 238 _LIBCPP_END_NAMESPACE_STD 239 240 _LIBCPP_POP_MACROS 241 242 #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H 243