1*0b57cec5SDimitry Andric //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This tablegen backend emits an efficient function to translate HTML named 10*0b57cec5SDimitry Andric // character references to UTF-8 sequences. 11*0b57cec5SDimitry Andric // 12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 13*0b57cec5SDimitry Andric 14*0b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h" 15*0b57cec5SDimitry Andric #include "llvm/Support/ConvertUTF.h" 16*0b57cec5SDimitry Andric #include "llvm/TableGen/Error.h" 17*0b57cec5SDimitry Andric #include "llvm/TableGen/Record.h" 18*0b57cec5SDimitry Andric #include "llvm/TableGen/StringMatcher.h" 19*0b57cec5SDimitry Andric #include "llvm/TableGen/TableGenBackend.h" 20*0b57cec5SDimitry Andric #include <vector> 21*0b57cec5SDimitry Andric 22*0b57cec5SDimitry Andric using namespace llvm; 23*0b57cec5SDimitry Andric 24*0b57cec5SDimitry Andric /// Convert a code point to the corresponding UTF-8 sequence represented 25*0b57cec5SDimitry Andric /// as a C string literal. 26*0b57cec5SDimitry Andric /// 27*0b57cec5SDimitry Andric /// \returns true on success. 28*0b57cec5SDimitry Andric static bool translateCodePointToUTF8(unsigned CodePoint, 29*0b57cec5SDimitry Andric SmallVectorImpl<char> &CLiteral) { 30*0b57cec5SDimitry Andric char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; 31*0b57cec5SDimitry Andric char *TranslatedPtr = Translated; 32*0b57cec5SDimitry Andric if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr)) 33*0b57cec5SDimitry Andric return false; 34*0b57cec5SDimitry Andric 35*0b57cec5SDimitry Andric StringRef UTF8(Translated, TranslatedPtr - Translated); 36*0b57cec5SDimitry Andric 37*0b57cec5SDimitry Andric raw_svector_ostream OS(CLiteral); 38*0b57cec5SDimitry Andric OS << "\""; 39*0b57cec5SDimitry Andric for (size_t i = 0, e = UTF8.size(); i != e; ++i) { 40*0b57cec5SDimitry Andric OS << "\\x"; 41*0b57cec5SDimitry Andric OS.write_hex(static_cast<unsigned char>(UTF8[i])); 42*0b57cec5SDimitry Andric } 43*0b57cec5SDimitry Andric OS << "\""; 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric return true; 46*0b57cec5SDimitry Andric } 47*0b57cec5SDimitry Andric 48*0b57cec5SDimitry Andric namespace clang { 49*0b57cec5SDimitry Andric void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records, 50*0b57cec5SDimitry Andric raw_ostream &OS) { 51*0b57cec5SDimitry Andric std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR"); 52*0b57cec5SDimitry Andric std::vector<StringMatcher::StringPair> NameToUTF8; 53*0b57cec5SDimitry Andric SmallString<32> CLiteral; 54*0b57cec5SDimitry Andric for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end(); 55*0b57cec5SDimitry Andric I != E; ++I) { 56*0b57cec5SDimitry Andric Record &Tag = **I; 57*0b57cec5SDimitry Andric std::string Spelling = Tag.getValueAsString("Spelling"); 58*0b57cec5SDimitry Andric uint64_t CodePoint = Tag.getValueAsInt("CodePoint"); 59*0b57cec5SDimitry Andric CLiteral.clear(); 60*0b57cec5SDimitry Andric CLiteral.append("return "); 61*0b57cec5SDimitry Andric if (!translateCodePointToUTF8(CodePoint, CLiteral)) { 62*0b57cec5SDimitry Andric SrcMgr.PrintMessage(Tag.getLoc().front(), 63*0b57cec5SDimitry Andric SourceMgr::DK_Error, 64*0b57cec5SDimitry Andric Twine("invalid code point")); 65*0b57cec5SDimitry Andric continue; 66*0b57cec5SDimitry Andric } 67*0b57cec5SDimitry Andric CLiteral.append(";"); 68*0b57cec5SDimitry Andric 69*0b57cec5SDimitry Andric StringMatcher::StringPair Match(Spelling, CLiteral.str()); 70*0b57cec5SDimitry Andric NameToUTF8.push_back(Match); 71*0b57cec5SDimitry Andric } 72*0b57cec5SDimitry Andric 73*0b57cec5SDimitry Andric emitSourceFileHeader("HTML named character reference to UTF-8 " 74*0b57cec5SDimitry Andric "translation", OS); 75*0b57cec5SDimitry Andric 76*0b57cec5SDimitry Andric OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n" 77*0b57cec5SDimitry Andric " StringRef Name) {\n"; 78*0b57cec5SDimitry Andric StringMatcher("Name", NameToUTF8, OS).Emit(); 79*0b57cec5SDimitry Andric OS << " return StringRef();\n" 80*0b57cec5SDimitry Andric << "}\n\n"; 81*0b57cec5SDimitry Andric } 82*0b57cec5SDimitry Andric 83*0b57cec5SDimitry Andric } // end namespace clang 84*0b57cec5SDimitry Andric 85