xref: /freebsd/contrib/llvm-project/libcxx/include/__format/write_escaped.h (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H
11 #define _LIBCPP___FORMAT_WRITE_ESCAPED_H
12 
13 #include <__algorithm/ranges_copy.h>
14 #include <__algorithm/ranges_for_each.h>
15 #include <__charconv/to_chars_integral.h>
16 #include <__charconv/to_chars_result.h>
17 #include <__chrono/statically_widen.h>
18 #include <__format/escaped_output_table.h>
19 #include <__format/formatter_output.h>
20 #include <__format/parser_std_format_spec.h>
21 #include <__format/unicode.h>
22 #include <__iterator/back_insert_iterator.h>
23 #include <__memory/addressof.h>
24 #include <__system_error/errc.h>
25 #include <__type_traits/make_unsigned.h>
26 #include <__utility/move.h>
27 #include <string_view>
28 
29 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
30 #  pragma GCC system_header
31 #endif
32 
33 _LIBCPP_BEGIN_NAMESPACE_STD
34 
35 namespace __formatter {
36 
37 #if _LIBCPP_STD_VER >= 20
38 
39 /// Writes a string using format's width estimation algorithm.
40 ///
41 /// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the
42 /// input is ASCII.
43 template <class _CharT>
44 _LIBCPP_HIDE_FROM_ABI auto
45 __write_string(basic_string_view<_CharT> __str,
46                output_iterator<const _CharT&> auto __out_it,
47                __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
48   if (!__specs.__has_precision())
49     return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs);
50 
51   int __size = __formatter::__truncate(__str, __specs.__precision_);
52 
53   return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size);
54 }
55 
56 #endif // _LIBCPP_STD_VER >= 20
57 #if _LIBCPP_STD_VER >= 23
58 
59 struct __nul_terminator {};
60 
61 template <class _CharT>
62 _LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {
63   return *__cstr == _CharT('\0');
64 }
65 
66 template <class _CharT>
67 _LIBCPP_HIDE_FROM_ABI void
68 __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {
69   back_insert_iterator __out_it{__str};
70   std::ranges::copy(__prefix, __nul_terminator{}, __out_it);
71 
72   char __buffer[8];
73   to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);
74   _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small");
75   std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);
76 
77   __str += _CharT('}');
78 }
79 
80 // [format.string.escaped]/2.2.1.2
81 // ...
82 // then the sequence \u{hex-digit-sequence} is appended to E, where
83 // hex-digit-sequence is the shortest hexadecimal representation of C using
84 // lower-case hexadecimal digits.
85 template <class _CharT>
86 _LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {
87   __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));
88 }
89 
90 // [format.string.escaped]/2.2.3
91 // Otherwise (X is a sequence of ill-formed code units), each code unit U is
92 // appended to E in order as the sequence \x{hex-digit-sequence}, where
93 // hex-digit-sequence is the shortest hexadecimal representation of U using
94 // lower-case hexadecimal digits.
95 template <class _CharT>
96 _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {
97   __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));
98 }
99 
100 template <class _CharT>
101 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) {
102 #  ifdef _LIBCPP_HAS_NO_UNICODE
103   // For ASCII assume everything above 127 is printable.
104   if (__value > 127)
105     return false;
106 #  endif
107 
108   if (!__escaped_output_table::__needs_escape(__value))
109     return false;
110 
111   __formatter::__write_well_formed_escaped_code_unit(__str, __value);
112   return true;
113 }
114 
115 template <class _CharT>
116 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {
117   return static_cast<make_unsigned_t<_CharT>>(__value);
118 }
119 
120 enum class __escape_quotation_mark { __apostrophe, __double_quote };
121 
122 // [format.string.escaped]/2
123 template <class _CharT>
124 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
125 __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) {
126   // 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
127   switch (__value) {
128   case _CharT('\t'):
129     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");
130     return true;
131   case _CharT('\n'):
132     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");
133     return true;
134   case _CharT('\r'):
135     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");
136     return true;
137   case _CharT('\''):
138     if (__mark == __escape_quotation_mark::__apostrophe)
139       __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");
140     else
141       __str += __value;
142     return true;
143   case _CharT('"'):
144     if (__mark == __escape_quotation_mark::__double_quote)
145       __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");
146     else
147       __str += __value;
148     return true;
149   case _CharT('\\'):
150     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");
151     return true;
152 
153   // 2.2.1.2 - Space
154   case _CharT(' '):
155     __str += __value;
156     return true;
157   }
158 
159   // 2.2.2
160   //   Otherwise, if X is a shift sequence, the effect on E and further
161   //   decoding of S is unspecified.
162   // For now shift sequences are ignored and treated as Unicode. Other parts
163   // of the format library do the same. It's unknown how ostream treats them.
164   // TODO FMT determine what to do with shift sequences.
165 
166   // 2.2.1.2.1 and 2.2.1.2.2 - Escape
167   return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value));
168 }
169 
170 template <class _CharT>
171 _LIBCPP_HIDE_FROM_ABI void
172 __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
173   __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
174 
175   while (!__view.__at_end()) {
176     auto __first                                  = __view.__position();
177     typename __unicode::__consume_result __result = __view.__consume();
178     if (__result.__status == __unicode::__consume_result::__ok) {
179       if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark))
180         // 2.2.1.3 - Add the character
181         ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
182     } else {
183       // 2.2.3 sequence of ill-formed code units
184       ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
185         __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
186       });
187     }
188   }
189 }
190 
191 template <class _CharT>
192 _LIBCPP_HIDE_FROM_ABI auto
193 __format_escaped_char(_CharT __value,
194                       output_iterator<const _CharT&> auto __out_it,
195                       __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
196   basic_string<_CharT> __str;
197   __str += _CharT('\'');
198   __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);
199   __str += _CharT('\'');
200   return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size());
201 }
202 
203 template <class _CharT>
204 _LIBCPP_HIDE_FROM_ABI auto
205 __format_escaped_string(basic_string_view<_CharT> __values,
206                         output_iterator<const _CharT&> auto __out_it,
207                         __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
208   basic_string<_CharT> __str;
209   __str += _CharT('"');
210   __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);
211   __str += _CharT('"');
212   return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs);
213 }
214 
215 #endif // _LIBCPP_STD_VER >= 23
216 
217 } // namespace __formatter
218 
219 _LIBCPP_END_NAMESPACE_STD
220 
221 #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H
222