1*700637cbSDimitry Andric //===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric
9*700637cbSDimitry Andric #include "llvm/TableGen/StringToOffsetTable.h"
10*700637cbSDimitry Andric #include "llvm/Support/FormatVariadic.h"
11*700637cbSDimitry Andric #include "llvm/Support/raw_ostream.h"
12*700637cbSDimitry Andric #include "llvm/TableGen/Error.h"
13*700637cbSDimitry Andric #include "llvm/TableGen/Main.h"
14*700637cbSDimitry Andric
15*700637cbSDimitry Andric using namespace llvm;
16*700637cbSDimitry Andric
GetOrAddStringOffset(StringRef Str)17*700637cbSDimitry Andric unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str) {
18*700637cbSDimitry Andric auto [II, Inserted] = StringOffset.insert({Str, size()});
19*700637cbSDimitry Andric if (Inserted) {
20*700637cbSDimitry Andric // Add the string to the aggregate if this is the first time found.
21*700637cbSDimitry Andric AggregateString.append(Str.begin(), Str.end());
22*700637cbSDimitry Andric if (AppendZero)
23*700637cbSDimitry Andric AggregateString += '\0';
24*700637cbSDimitry Andric }
25*700637cbSDimitry Andric
26*700637cbSDimitry Andric return II->second;
27*700637cbSDimitry Andric }
28*700637cbSDimitry Andric
EmitStringTableDef(raw_ostream & OS,const Twine & Name) const29*700637cbSDimitry Andric void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS,
30*700637cbSDimitry Andric const Twine &Name) const {
31*700637cbSDimitry Andric // This generates a `llvm::StringTable` which expects that entries are null
32*700637cbSDimitry Andric // terminated. So fail with an error if `AppendZero` is false.
33*700637cbSDimitry Andric if (!AppendZero)
34*700637cbSDimitry Andric PrintFatalError("llvm::StringTable requires null terminated strings");
35*700637cbSDimitry Andric
36*700637cbSDimitry Andric OS << formatv(R"(
37*700637cbSDimitry Andric #ifdef __GNUC__
38*700637cbSDimitry Andric #pragma GCC diagnostic push
39*700637cbSDimitry Andric #pragma GCC diagnostic ignored "-Woverlength-strings"
40*700637cbSDimitry Andric #endif
41*700637cbSDimitry Andric static constexpr char {}Storage[] = )",
42*700637cbSDimitry Andric Name);
43*700637cbSDimitry Andric
44*700637cbSDimitry Andric // MSVC silently miscompiles string literals longer than 64k in some
45*700637cbSDimitry Andric // circumstances. The build system sets EmitLongStrLiterals to false when it
46*700637cbSDimitry Andric // detects that it is targetting MSVC. When that option is false and the
47*700637cbSDimitry Andric // string table is longer than 64k, emit it as an array of character
48*700637cbSDimitry Andric // literals.
49*700637cbSDimitry Andric bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024);
50*700637cbSDimitry Andric OS << (UseChars ? "{\n" : "\n");
51*700637cbSDimitry Andric
52*700637cbSDimitry Andric ListSeparator LineSep(UseChars ? ",\n" : "\n");
53*700637cbSDimitry Andric SmallVector<StringRef> Strings(split(AggregateString, '\0'));
54*700637cbSDimitry Andric // We should always have an empty string at the start, and because these are
55*700637cbSDimitry Andric // null terminators rather than separators, we'll have one at the end as
56*700637cbSDimitry Andric // well. Skip the end one.
57*700637cbSDimitry Andric assert(Strings.front().empty() && "Expected empty initial string!");
58*700637cbSDimitry Andric assert(Strings.back().empty() &&
59*700637cbSDimitry Andric "Expected empty string at the end due to terminators!");
60*700637cbSDimitry Andric Strings.pop_back();
61*700637cbSDimitry Andric for (StringRef Str : Strings) {
62*700637cbSDimitry Andric OS << LineSep << " ";
63*700637cbSDimitry Andric // If we can, just emit this as a string literal to be concatenated.
64*700637cbSDimitry Andric if (!UseChars) {
65*700637cbSDimitry Andric OS << "\"";
66*700637cbSDimitry Andric OS.write_escaped(Str);
67*700637cbSDimitry Andric OS << "\\0\"";
68*700637cbSDimitry Andric continue;
69*700637cbSDimitry Andric }
70*700637cbSDimitry Andric
71*700637cbSDimitry Andric ListSeparator CharSep(", ");
72*700637cbSDimitry Andric for (char C : Str) {
73*700637cbSDimitry Andric OS << CharSep << "'";
74*700637cbSDimitry Andric OS.write_escaped(StringRef(&C, 1));
75*700637cbSDimitry Andric OS << "'";
76*700637cbSDimitry Andric }
77*700637cbSDimitry Andric OS << CharSep << "'\\0'";
78*700637cbSDimitry Andric }
79*700637cbSDimitry Andric OS << LineSep << (UseChars ? "};" : " ;");
80*700637cbSDimitry Andric
81*700637cbSDimitry Andric OS << formatv(R"(
82*700637cbSDimitry Andric #ifdef __GNUC__
83*700637cbSDimitry Andric #pragma GCC diagnostic pop
84*700637cbSDimitry Andric #endif
85*700637cbSDimitry Andric
86*700637cbSDimitry Andric static constexpr llvm::StringTable
87*700637cbSDimitry Andric {0} = {0}Storage;
88*700637cbSDimitry Andric )",
89*700637cbSDimitry Andric Name);
90*700637cbSDimitry Andric }
91*700637cbSDimitry Andric
EmitString(raw_ostream & O) const92*700637cbSDimitry Andric void StringToOffsetTable::EmitString(raw_ostream &O) const {
93*700637cbSDimitry Andric // Escape the string.
94*700637cbSDimitry Andric SmallString<256> EscapedStr;
95*700637cbSDimitry Andric raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
96*700637cbSDimitry Andric
97*700637cbSDimitry Andric O << " \"";
98*700637cbSDimitry Andric unsigned CharsPrinted = 0;
99*700637cbSDimitry Andric for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
100*700637cbSDimitry Andric if (CharsPrinted > 70) {
101*700637cbSDimitry Andric O << "\"\n \"";
102*700637cbSDimitry Andric CharsPrinted = 0;
103*700637cbSDimitry Andric }
104*700637cbSDimitry Andric O << EscapedStr[i];
105*700637cbSDimitry Andric ++CharsPrinted;
106*700637cbSDimitry Andric
107*700637cbSDimitry Andric // Print escape sequences all together.
108*700637cbSDimitry Andric if (EscapedStr[i] != '\\')
109*700637cbSDimitry Andric continue;
110*700637cbSDimitry Andric
111*700637cbSDimitry Andric assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
112*700637cbSDimitry Andric if (isDigit(EscapedStr[i + 1])) {
113*700637cbSDimitry Andric assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
114*700637cbSDimitry Andric "Expected 3 digit octal escape!");
115*700637cbSDimitry Andric O << EscapedStr[++i];
116*700637cbSDimitry Andric O << EscapedStr[++i];
117*700637cbSDimitry Andric O << EscapedStr[++i];
118*700637cbSDimitry Andric CharsPrinted += 3;
119*700637cbSDimitry Andric } else {
120*700637cbSDimitry Andric O << EscapedStr[++i];
121*700637cbSDimitry Andric ++CharsPrinted;
122*700637cbSDimitry Andric }
123*700637cbSDimitry Andric }
124*700637cbSDimitry Andric O << "\"";
125*700637cbSDimitry Andric }
126