xref: /freebsd/contrib/llvm-project/llvm/lib/TableGen/StringToOffsetTable.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1*700637cbSDimitry Andric //===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric 
9*700637cbSDimitry Andric #include "llvm/TableGen/StringToOffsetTable.h"
10*700637cbSDimitry Andric #include "llvm/Support/FormatVariadic.h"
11*700637cbSDimitry Andric #include "llvm/Support/raw_ostream.h"
12*700637cbSDimitry Andric #include "llvm/TableGen/Error.h"
13*700637cbSDimitry Andric #include "llvm/TableGen/Main.h"
14*700637cbSDimitry Andric 
15*700637cbSDimitry Andric using namespace llvm;
16*700637cbSDimitry Andric 
GetOrAddStringOffset(StringRef Str)17*700637cbSDimitry Andric unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str) {
18*700637cbSDimitry Andric   auto [II, Inserted] = StringOffset.insert({Str, size()});
19*700637cbSDimitry Andric   if (Inserted) {
20*700637cbSDimitry Andric     // Add the string to the aggregate if this is the first time found.
21*700637cbSDimitry Andric     AggregateString.append(Str.begin(), Str.end());
22*700637cbSDimitry Andric     if (AppendZero)
23*700637cbSDimitry Andric       AggregateString += '\0';
24*700637cbSDimitry Andric   }
25*700637cbSDimitry Andric 
26*700637cbSDimitry Andric   return II->second;
27*700637cbSDimitry Andric }
28*700637cbSDimitry Andric 
EmitStringTableDef(raw_ostream & OS,const Twine & Name) const29*700637cbSDimitry Andric void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS,
30*700637cbSDimitry Andric                                              const Twine &Name) const {
31*700637cbSDimitry Andric   // This generates a `llvm::StringTable` which expects that entries are null
32*700637cbSDimitry Andric   // terminated. So fail with an error if `AppendZero` is false.
33*700637cbSDimitry Andric   if (!AppendZero)
34*700637cbSDimitry Andric     PrintFatalError("llvm::StringTable requires null terminated strings");
35*700637cbSDimitry Andric 
36*700637cbSDimitry Andric   OS << formatv(R"(
37*700637cbSDimitry Andric #ifdef __GNUC__
38*700637cbSDimitry Andric #pragma GCC diagnostic push
39*700637cbSDimitry Andric #pragma GCC diagnostic ignored "-Woverlength-strings"
40*700637cbSDimitry Andric #endif
41*700637cbSDimitry Andric static constexpr char {}Storage[] = )",
42*700637cbSDimitry Andric                 Name);
43*700637cbSDimitry Andric 
44*700637cbSDimitry Andric   // MSVC silently miscompiles string literals longer than 64k in some
45*700637cbSDimitry Andric   // circumstances. The build system sets EmitLongStrLiterals to false when it
46*700637cbSDimitry Andric   // detects that it is targetting MSVC. When that option is false and the
47*700637cbSDimitry Andric   // string table is longer than 64k, emit it as an array of character
48*700637cbSDimitry Andric   // literals.
49*700637cbSDimitry Andric   bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024);
50*700637cbSDimitry Andric   OS << (UseChars ? "{\n" : "\n");
51*700637cbSDimitry Andric 
52*700637cbSDimitry Andric   ListSeparator LineSep(UseChars ? ",\n" : "\n");
53*700637cbSDimitry Andric   SmallVector<StringRef> Strings(split(AggregateString, '\0'));
54*700637cbSDimitry Andric   // We should always have an empty string at the start, and because these are
55*700637cbSDimitry Andric   // null terminators rather than separators, we'll have one at the end as
56*700637cbSDimitry Andric   // well. Skip the end one.
57*700637cbSDimitry Andric   assert(Strings.front().empty() && "Expected empty initial string!");
58*700637cbSDimitry Andric   assert(Strings.back().empty() &&
59*700637cbSDimitry Andric          "Expected empty string at the end due to terminators!");
60*700637cbSDimitry Andric   Strings.pop_back();
61*700637cbSDimitry Andric   for (StringRef Str : Strings) {
62*700637cbSDimitry Andric     OS << LineSep << "  ";
63*700637cbSDimitry Andric     // If we can, just emit this as a string literal to be concatenated.
64*700637cbSDimitry Andric     if (!UseChars) {
65*700637cbSDimitry Andric       OS << "\"";
66*700637cbSDimitry Andric       OS.write_escaped(Str);
67*700637cbSDimitry Andric       OS << "\\0\"";
68*700637cbSDimitry Andric       continue;
69*700637cbSDimitry Andric     }
70*700637cbSDimitry Andric 
71*700637cbSDimitry Andric     ListSeparator CharSep(", ");
72*700637cbSDimitry Andric     for (char C : Str) {
73*700637cbSDimitry Andric       OS << CharSep << "'";
74*700637cbSDimitry Andric       OS.write_escaped(StringRef(&C, 1));
75*700637cbSDimitry Andric       OS << "'";
76*700637cbSDimitry Andric     }
77*700637cbSDimitry Andric     OS << CharSep << "'\\0'";
78*700637cbSDimitry Andric   }
79*700637cbSDimitry Andric   OS << LineSep << (UseChars ? "};" : "  ;");
80*700637cbSDimitry Andric 
81*700637cbSDimitry Andric   OS << formatv(R"(
82*700637cbSDimitry Andric #ifdef __GNUC__
83*700637cbSDimitry Andric #pragma GCC diagnostic pop
84*700637cbSDimitry Andric #endif
85*700637cbSDimitry Andric 
86*700637cbSDimitry Andric static constexpr llvm::StringTable
87*700637cbSDimitry Andric {0} = {0}Storage;
88*700637cbSDimitry Andric )",
89*700637cbSDimitry Andric                 Name);
90*700637cbSDimitry Andric }
91*700637cbSDimitry Andric 
EmitString(raw_ostream & O) const92*700637cbSDimitry Andric void StringToOffsetTable::EmitString(raw_ostream &O) const {
93*700637cbSDimitry Andric   // Escape the string.
94*700637cbSDimitry Andric   SmallString<256> EscapedStr;
95*700637cbSDimitry Andric   raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
96*700637cbSDimitry Andric 
97*700637cbSDimitry Andric   O << "    \"";
98*700637cbSDimitry Andric   unsigned CharsPrinted = 0;
99*700637cbSDimitry Andric   for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
100*700637cbSDimitry Andric     if (CharsPrinted > 70) {
101*700637cbSDimitry Andric       O << "\"\n    \"";
102*700637cbSDimitry Andric       CharsPrinted = 0;
103*700637cbSDimitry Andric     }
104*700637cbSDimitry Andric     O << EscapedStr[i];
105*700637cbSDimitry Andric     ++CharsPrinted;
106*700637cbSDimitry Andric 
107*700637cbSDimitry Andric     // Print escape sequences all together.
108*700637cbSDimitry Andric     if (EscapedStr[i] != '\\')
109*700637cbSDimitry Andric       continue;
110*700637cbSDimitry Andric 
111*700637cbSDimitry Andric     assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
112*700637cbSDimitry Andric     if (isDigit(EscapedStr[i + 1])) {
113*700637cbSDimitry Andric       assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
114*700637cbSDimitry Andric              "Expected 3 digit octal escape!");
115*700637cbSDimitry Andric       O << EscapedStr[++i];
116*700637cbSDimitry Andric       O << EscapedStr[++i];
117*700637cbSDimitry Andric       O << EscapedStr[++i];
118*700637cbSDimitry Andric       CharsPrinted += 3;
119*700637cbSDimitry Andric     } else {
120*700637cbSDimitry Andric       O << EscapedStr[++i];
121*700637cbSDimitry Andric       ++CharsPrinted;
122*700637cbSDimitry Andric     }
123*700637cbSDimitry Andric   }
124*700637cbSDimitry Andric   O << "\"";
125*700637cbSDimitry Andric }
126