10b57cec5SDimitry Andric //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This tablegen backend emits an efficient function to translate HTML named 100b57cec5SDimitry Andric // character references to UTF-8 sequences. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 14a7dea167SDimitry Andric #include "TableGenBackends.h" 150b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h" 160b57cec5SDimitry Andric #include "llvm/Support/ConvertUTF.h" 170b57cec5SDimitry Andric #include "llvm/TableGen/Error.h" 180b57cec5SDimitry Andric #include "llvm/TableGen/Record.h" 190b57cec5SDimitry Andric #include "llvm/TableGen/StringMatcher.h" 200b57cec5SDimitry Andric #include "llvm/TableGen/TableGenBackend.h" 210b57cec5SDimitry Andric #include <vector> 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric using namespace llvm; 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric /// Convert a code point to the corresponding UTF-8 sequence represented 260b57cec5SDimitry Andric /// as a C string literal. 270b57cec5SDimitry Andric /// 280b57cec5SDimitry Andric /// \returns true on success. 290b57cec5SDimitry Andric static bool translateCodePointToUTF8(unsigned CodePoint, 300b57cec5SDimitry Andric SmallVectorImpl<char> &CLiteral) { 310b57cec5SDimitry Andric char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; 320b57cec5SDimitry Andric char *TranslatedPtr = Translated; 330b57cec5SDimitry Andric if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr)) 340b57cec5SDimitry Andric return false; 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric StringRef UTF8(Translated, TranslatedPtr - Translated); 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric raw_svector_ostream OS(CLiteral); 390b57cec5SDimitry Andric OS << "\""; 400b57cec5SDimitry Andric for (size_t i = 0, e = UTF8.size(); i != e; ++i) { 410b57cec5SDimitry Andric OS << "\\x"; 420b57cec5SDimitry Andric OS.write_hex(static_cast<unsigned char>(UTF8[i])); 430b57cec5SDimitry Andric } 440b57cec5SDimitry Andric OS << "\""; 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric return true; 470b57cec5SDimitry Andric } 480b57cec5SDimitry Andric 49a7dea167SDimitry Andric void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records, 500b57cec5SDimitry Andric raw_ostream &OS) { 510b57cec5SDimitry Andric std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR"); 520b57cec5SDimitry Andric std::vector<StringMatcher::StringPair> NameToUTF8; 530b57cec5SDimitry Andric SmallString<32> CLiteral; 540b57cec5SDimitry Andric for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end(); 550b57cec5SDimitry Andric I != E; ++I) { 560b57cec5SDimitry Andric Record &Tag = **I; 575ffd83dbSDimitry Andric std::string Spelling = std::string(Tag.getValueAsString("Spelling")); 580b57cec5SDimitry Andric uint64_t CodePoint = Tag.getValueAsInt("CodePoint"); 590b57cec5SDimitry Andric CLiteral.clear(); 600b57cec5SDimitry Andric CLiteral.append("return "); 610b57cec5SDimitry Andric if (!translateCodePointToUTF8(CodePoint, CLiteral)) { 620b57cec5SDimitry Andric SrcMgr.PrintMessage(Tag.getLoc().front(), 630b57cec5SDimitry Andric SourceMgr::DK_Error, 640b57cec5SDimitry Andric Twine("invalid code point")); 650b57cec5SDimitry Andric continue; 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric CLiteral.append(";"); 680b57cec5SDimitry Andric 69*7a6dacacSDimitry Andric StringMatcher::StringPair Match(Spelling, std::string(CLiteral)); 700b57cec5SDimitry Andric NameToUTF8.push_back(Match); 710b57cec5SDimitry Andric } 720b57cec5SDimitry Andric 735f757f3fSDimitry Andric emitSourceFileHeader("HTML named character reference to UTF-8 translation", 745f757f3fSDimitry Andric OS, Records); 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n" 770b57cec5SDimitry Andric " StringRef Name) {\n"; 780b57cec5SDimitry Andric StringMatcher("Name", NameToUTF8, OS).Emit(); 790b57cec5SDimitry Andric OS << " return StringRef();\n" 800b57cec5SDimitry Andric << "}\n\n"; 810b57cec5SDimitry Andric } 82