xref: /freebsd/contrib/llvm-project/libcxx/include/__format/write_escaped.h (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H
11 #define _LIBCPP___FORMAT_WRITE_ESCAPED_H
12 
13 #include <__algorithm/ranges_copy.h>
14 #include <__algorithm/ranges_for_each.h>
15 #include <__charconv/to_chars_integral.h>
16 #include <__charconv/to_chars_result.h>
17 #include <__chrono/statically_widen.h>
18 #include <__format/escaped_output_table.h>
19 #include <__format/formatter_output.h>
20 #include <__format/parser_std_format_spec.h>
21 #include <__format/unicode.h>
22 #include <__iterator/back_insert_iterator.h>
23 #include <__memory/addressof.h>
24 #include <__system_error/errc.h>
25 #include <__type_traits/make_unsigned.h>
26 #include <__utility/move.h>
27 #include <string_view>
28 
29 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
30 #  pragma GCC system_header
31 #endif
32 
33 _LIBCPP_PUSH_MACROS
34 #include <__undef_macros>
35 
36 _LIBCPP_BEGIN_NAMESPACE_STD
37 
38 namespace __formatter {
39 
40 #if _LIBCPP_STD_VER >= 20
41 
42 /// Writes a string using format's width estimation algorithm.
43 ///
44 /// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the
45 /// input is ASCII.
46 template <class _CharT>
47 _LIBCPP_HIDE_FROM_ABI auto
48 __write_string(basic_string_view<_CharT> __str,
49                output_iterator<const _CharT&> auto __out_it,
50                __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
51   if (!__specs.__has_precision())
52     return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs);
53 
54   int __size = __formatter::__truncate(__str, __specs.__precision_);
55 
56   return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size);
57 }
58 
59 #endif // _LIBCPP_STD_VER >= 20
60 #if _LIBCPP_STD_VER >= 23
61 
62 struct __nul_terminator {};
63 
64 template <class _CharT>
65 _LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {
66   return *__cstr == _CharT('\0');
67 }
68 
69 template <class _CharT>
70 _LIBCPP_HIDE_FROM_ABI void
71 __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {
72   back_insert_iterator __out_it{__str};
73   std::ranges::copy(__prefix, __nul_terminator{}, __out_it);
74 
75   char __buffer[8];
76   to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);
77   _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small");
78   std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);
79 
80   __str += _CharT('}');
81 }
82 
83 // [format.string.escaped]/2.2.1.2
84 // ...
85 // then the sequence \u{hex-digit-sequence} is appended to E, where
86 // hex-digit-sequence is the shortest hexadecimal representation of C using
87 // lower-case hexadecimal digits.
88 template <class _CharT>
89 _LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {
90   __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));
91 }
92 
93 // [format.string.escaped]/2.2.3
94 // Otherwise (X is a sequence of ill-formed code units), each code unit U is
95 // appended to E in order as the sequence \x{hex-digit-sequence}, where
96 // hex-digit-sequence is the shortest hexadecimal representation of U using
97 // lower-case hexadecimal digits.
98 template <class _CharT>
99 _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {
100   __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));
101 }
102 
103 template <class _CharT>
104 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
105 __is_escaped_sequence_written(basic_string<_CharT>& __str, bool __last_escaped, char32_t __value) {
106 #  ifdef _LIBCPP_HAS_NO_UNICODE
107   // For ASCII assume everything above 127 is printable.
108   if (__value > 127)
109     return false;
110 #  endif
111 
112   // [format.string.escaped]/2.2.1.2.1
113   //   CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar
114   //   value whose Unicode property General_Category has a value in the groups
115   //   Separator (Z) or Other (C), as described by UAX #44 of the Unicode Standard,
116   if (!__escaped_output_table::__needs_escape(__value))
117     // [format.string.escaped]/2.2.1.2.2
118     //   CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar
119     //   value with the Unicode property Grapheme_Extend=Yes as described by UAX
120     //   #44 of the Unicode Standard and C is not immediately preceded in S by a
121     //   character P appended to E without translation to an escape sequence,
122     if (!__last_escaped || __extended_grapheme_custer_property_boundary::__get_property(__value) !=
123                                __extended_grapheme_custer_property_boundary::__property::__Extend)
124       return false;
125 
126   __formatter::__write_well_formed_escaped_code_unit(__str, __value);
127   return true;
128 }
129 
130 template <class _CharT>
131 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {
132   return static_cast<make_unsigned_t<_CharT>>(__value);
133 }
134 
135 enum class __escape_quotation_mark { __apostrophe, __double_quote };
136 
137 // [format.string.escaped]/2
138 template <class _CharT>
139 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(
140     basic_string<_CharT>& __str, char32_t __value, bool __last_escaped, __escape_quotation_mark __mark) {
141   // 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
142   switch (__value) {
143   case _CharT('\t'):
144     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");
145     return true;
146   case _CharT('\n'):
147     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");
148     return true;
149   case _CharT('\r'):
150     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");
151     return true;
152   case _CharT('\''):
153     if (__mark == __escape_quotation_mark::__apostrophe)
154       __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");
155     else
156       __str += __value;
157     return true;
158   case _CharT('"'):
159     if (__mark == __escape_quotation_mark::__double_quote)
160       __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");
161     else
162       __str += __value;
163     return true;
164   case _CharT('\\'):
165     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");
166     return true;
167 
168   // 2.2.1.2 - Space
169   case _CharT(' '):
170     __str += __value;
171     return true;
172   }
173 
174   // 2.2.2
175   //   Otherwise, if X is a shift sequence, the effect on E and further
176   //   decoding of S is unspecified.
177   // For now shift sequences are ignored and treated as Unicode. Other parts
178   // of the format library do the same. It's unknown how ostream treats them.
179   // TODO FMT determine what to do with shift sequences.
180 
181   // 2.2.1.2.1 and 2.2.1.2.2 - Escape
182   return __formatter::__is_escaped_sequence_written(__str, __last_escaped, __formatter::__to_char32(__value));
183 }
184 
185 template <class _CharT>
186 _LIBCPP_HIDE_FROM_ABI void
187 __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
188   __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
189 
190   // When the first code unit has the property Grapheme_Extend=Yes it needs to
191   // be escaped. This happens when the previous code unit was also escaped.
192   bool __escape = true;
193   while (!__view.__at_end()) {
194     auto __first                                  = __view.__position();
195     typename __unicode::__consume_result __result = __view.__consume();
196     if (__result.__status == __unicode::__consume_result::__ok) {
197       __escape = __formatter::__is_escaped_sequence_written(__str, __result.__code_point, __escape, __mark);
198       if (!__escape)
199         // 2.2.1.3 - Add the character
200         ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
201     } else {
202       // 2.2.3 sequence of ill-formed code units
203       ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
204         __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
205       });
206     }
207   }
208 }
209 
210 template <class _CharT>
211 _LIBCPP_HIDE_FROM_ABI auto
212 __format_escaped_char(_CharT __value,
213                       output_iterator<const _CharT&> auto __out_it,
214                       __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
215   basic_string<_CharT> __str;
216   __str += _CharT('\'');
217   __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);
218   __str += _CharT('\'');
219   return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size());
220 }
221 
222 template <class _CharT>
223 _LIBCPP_HIDE_FROM_ABI auto
224 __format_escaped_string(basic_string_view<_CharT> __values,
225                         output_iterator<const _CharT&> auto __out_it,
226                         __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
227   basic_string<_CharT> __str;
228   __str += _CharT('"');
229   __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);
230   __str += _CharT('"');
231   return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs);
232 }
233 
234 #endif // _LIBCPP_STD_VER >= 23
235 
236 } // namespace __formatter
237 
238 _LIBCPP_END_NAMESPACE_STD
239 
240 _LIBCPP_POP_MACROS
241 
242 #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H
243