// -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H #define _LIBCPP___FORMAT_WRITE_ESCAPED_H #include <__algorithm/ranges_copy.h> #include <__algorithm/ranges_for_each.h> #include <__charconv/to_chars_integral.h> #include <__charconv/to_chars_result.h> #include <__chrono/statically_widen.h> #include <__format/escaped_output_table.h> #include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> #include <__format/unicode.h> #include <__iterator/back_insert_iterator.h> #include <__memory/addressof.h> #include <__system_error/errc.h> #include <__type_traits/make_unsigned.h> #include <__utility/move.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif _LIBCPP_BEGIN_NAMESPACE_STD namespace __formatter { #if _LIBCPP_STD_VER >= 20 /// Writes a string using format's width estimation algorithm. /// /// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the /// input is ASCII. template _LIBCPP_HIDE_FROM_ABI auto __write_string( basic_string_view<_CharT> __str, output_iterator auto __out_it, __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { if (!__specs.__has_precision()) return __formatter::__write_string_no_precision(__str, _VSTD::move(__out_it), __specs); int __size = __formatter::__truncate(__str, __specs.__precision_); return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size); } # endif // _LIBCPP_STD_VER >= 20 # if _LIBCPP_STD_VER >= 23 struct __nul_terminator {}; template _LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) { return *__cstr == _CharT('\0'); } template _LIBCPP_HIDE_FROM_ABI void __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) { back_insert_iterator __out_it{__str}; std::ranges::copy(__prefix, __nul_terminator{}, __out_it); char __buffer[8]; to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16); _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small"); std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it); __str += _CharT('}'); } // [format.string.escaped]/2.2.1.2 // ... // then the sequence \u{hex-digit-sequence} is appended to E, where // hex-digit-sequence is the shortest hexadecimal representation of C using // lower-case hexadecimal digits. template _LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) { __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{")); } // [format.string.escaped]/2.2.3 // Otherwise (X is a sequence of ill-formed code units), each code unit U is // appended to E in order as the sequence \x{hex-digit-sequence}, where // hex-digit-sequence is the shortest hexadecimal representation of U using // lower-case hexadecimal digits. template _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) { __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{")); } template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) { # ifdef _LIBCPP_HAS_NO_UNICODE // For ASCII assume everything above 127 is printable. if (__value > 127) return false; # endif if (!__escaped_output_table::__needs_escape(__value)) return false; __formatter::__write_well_formed_escaped_code_unit(__str, __value); return true; } template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) { return static_cast>(__value); } enum class _LIBCPP_ENUM_VIS __escape_quotation_mark { __apostrophe, __double_quote }; // [format.string.escaped]/2 template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) { // 2.2.1.1 - Mapped character in [tab:format.escape.sequences] switch (__value) { case _CharT('\t'): __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t"); return true; case _CharT('\n'): __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n"); return true; case _CharT('\r'): __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r"); return true; case _CharT('\''): if (__mark == __escape_quotation_mark::__apostrophe) __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')"); else __str += __value; return true; case _CharT('"'): if (__mark == __escape_quotation_mark::__double_quote) __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")"); else __str += __value; return true; case _CharT('\\'): __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)"); return true; // 2.2.1.2 - Space case _CharT(' '): __str += __value; return true; } // 2.2.2 // Otherwise, if X is a shift sequence, the effect on E and further // decoding of S is unspecified. // For now shift sequences are ignored and treated as Unicode. Other parts // of the format library do the same. It's unknown how ostream treats them. // TODO FMT determine what to do with shift sequences. // 2.2.1.2.1 and 2.2.1.2.2 - Escape return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value)); } template _LIBCPP_HIDE_FROM_ABI void __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) { __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()}; while (!__view.__at_end()) { auto __first = __view.__position(); typename __unicode::__consume_result __result = __view.__consume(); if (__result.__status == __unicode::__consume_result::__ok) { if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark)) // 2.2.1.3 - Add the character ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str)); } else { // 2.2.3 sequence of ill-formed code units ranges::for_each(__first, __view.__position(), [&](_CharT __value) { __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value)); }); } } } template _LIBCPP_HIDE_FROM_ABI auto __format_escaped_char(_CharT __value, output_iterator auto __out_it, __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { basic_string<_CharT> __str; __str += _CharT('\''); __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe); __str += _CharT('\''); return __formatter::__write(__str.data(), __str.data() + __str.size(), _VSTD::move(__out_it), __specs, __str.size()); } template _LIBCPP_HIDE_FROM_ABI auto __format_escaped_string(basic_string_view<_CharT> __values, output_iterator auto __out_it, __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { basic_string<_CharT> __str; __str += _CharT('"'); __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote); __str += _CharT('"'); return __formatter::__write_string(basic_string_view{__str}, _VSTD::move(__out_it), __specs); } # endif // _LIBCPP_STD_VER >= 23 } // namespace __formatter _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H