1 //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend emits an efficient function to translate HTML named 10 // character references to UTF-8 sequences. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/SmallString.h" 15 #include "llvm/Support/ConvertUTF.h" 16 #include "llvm/TableGen/Error.h" 17 #include "llvm/TableGen/Record.h" 18 #include "llvm/TableGen/StringMatcher.h" 19 #include "llvm/TableGen/TableGenBackend.h" 20 #include <vector> 21 22 using namespace llvm; 23 24 /// Convert a code point to the corresponding UTF-8 sequence represented 25 /// as a C string literal. 26 /// 27 /// \returns true on success. 28 static bool translateCodePointToUTF8(unsigned CodePoint, 29 SmallVectorImpl<char> &CLiteral) { 30 char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; 31 char *TranslatedPtr = Translated; 32 if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr)) 33 return false; 34 35 StringRef UTF8(Translated, TranslatedPtr - Translated); 36 37 raw_svector_ostream OS(CLiteral); 38 OS << "\""; 39 for (size_t i = 0, e = UTF8.size(); i != e; ++i) { 40 OS << "\\x"; 41 OS.write_hex(static_cast<unsigned char>(UTF8[i])); 42 } 43 OS << "\""; 44 45 return true; 46 } 47 48 namespace clang { 49 void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records, 50 raw_ostream &OS) { 51 std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR"); 52 std::vector<StringMatcher::StringPair> NameToUTF8; 53 SmallString<32> CLiteral; 54 for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end(); 55 I != E; ++I) { 56 Record &Tag = **I; 57 std::string Spelling = Tag.getValueAsString("Spelling"); 58 uint64_t CodePoint = Tag.getValueAsInt("CodePoint"); 59 CLiteral.clear(); 60 CLiteral.append("return "); 61 if (!translateCodePointToUTF8(CodePoint, CLiteral)) { 62 SrcMgr.PrintMessage(Tag.getLoc().front(), 63 SourceMgr::DK_Error, 64 Twine("invalid code point")); 65 continue; 66 } 67 CLiteral.append(";"); 68 69 StringMatcher::StringPair Match(Spelling, CLiteral.str()); 70 NameToUTF8.push_back(Match); 71 } 72 73 emitSourceFileHeader("HTML named character reference to UTF-8 " 74 "translation", OS); 75 76 OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n" 77 " StringRef Name) {\n"; 78 StringMatcher("Name", NameToUTF8, OS).Emit(); 79 OS << " return StringRef();\n" 80 << "}\n\n"; 81 } 82 83 } // end namespace clang 84 85