1 //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This tablegen backend emits an efficient function to translate HTML named
10 // character references to UTF-8 sequences.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/SmallString.h"
15 #include "llvm/Support/ConvertUTF.h"
16 #include "llvm/TableGen/Error.h"
17 #include "llvm/TableGen/Record.h"
18 #include "llvm/TableGen/StringMatcher.h"
19 #include "llvm/TableGen/TableGenBackend.h"
20 #include <vector>
21 
22 using namespace llvm;
23 
24 /// Convert a code point to the corresponding UTF-8 sequence represented
25 /// as a C string literal.
26 ///
27 /// \returns true on success.
28 static bool translateCodePointToUTF8(unsigned CodePoint,
29                                      SmallVectorImpl<char> &CLiteral) {
30   char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
31   char *TranslatedPtr = Translated;
32   if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
33     return false;
34 
35   StringRef UTF8(Translated, TranslatedPtr - Translated);
36 
37   raw_svector_ostream OS(CLiteral);
38   OS << "\"";
39   for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
40     OS << "\\x";
41     OS.write_hex(static_cast<unsigned char>(UTF8[i]));
42   }
43   OS << "\"";
44 
45   return true;
46 }
47 
48 namespace clang {
49 void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
50                                                   raw_ostream &OS) {
51   std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
52   std::vector<StringMatcher::StringPair> NameToUTF8;
53   SmallString<32> CLiteral;
54   for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
55        I != E; ++I) {
56     Record &Tag = **I;
57     std::string Spelling = Tag.getValueAsString("Spelling");
58     uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
59     CLiteral.clear();
60     CLiteral.append("return ");
61     if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
62       SrcMgr.PrintMessage(Tag.getLoc().front(),
63                           SourceMgr::DK_Error,
64                           Twine("invalid code point"));
65       continue;
66     }
67     CLiteral.append(";");
68 
69     StringMatcher::StringPair Match(Spelling, CLiteral.str());
70     NameToUTF8.push_back(Match);
71   }
72 
73   emitSourceFileHeader("HTML named character reference to UTF-8 "
74                        "translation", OS);
75 
76   OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
77         "                                             StringRef Name) {\n";
78   StringMatcher("Name", NameToUTF8, OS).Emit();
79   OS << "  return StringRef();\n"
80      << "}\n\n";
81 }
82 
83 } // end namespace clang
84 
85