1 //===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/TableGen/StringToOffsetTable.h" 10 #include "llvm/Support/FormatVariadic.h" 11 #include "llvm/Support/raw_ostream.h" 12 #include "llvm/TableGen/Error.h" 13 #include "llvm/TableGen/Main.h" 14 15 using namespace llvm; 16 17 unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str) { 18 auto [II, Inserted] = StringOffset.insert({Str, size()}); 19 if (Inserted) { 20 // Add the string to the aggregate if this is the first time found. 21 AggregateString.append(Str.begin(), Str.end()); 22 if (AppendZero) 23 AggregateString += '\0'; 24 } 25 26 return II->second; 27 } 28 29 void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, 30 const Twine &Name) const { 31 // This generates a `llvm::StringTable` which expects that entries are null 32 // terminated. So fail with an error if `AppendZero` is false. 33 if (!AppendZero) 34 PrintFatalError("llvm::StringTable requires null terminated strings"); 35 36 OS << formatv(R"( 37 #ifdef __GNUC__ 38 #pragma GCC diagnostic push 39 #pragma GCC diagnostic ignored "-Woverlength-strings" 40 #endif 41 static constexpr char {}Storage[] = )", 42 Name); 43 44 // MSVC silently miscompiles string literals longer than 64k in some 45 // circumstances. The build system sets EmitLongStrLiterals to false when it 46 // detects that it is targetting MSVC. When that option is false and the 47 // string table is longer than 64k, emit it as an array of character 48 // literals. 49 bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024); 50 OS << (UseChars ? "{\n" : "\n"); 51 52 ListSeparator LineSep(UseChars ? ",\n" : "\n"); 53 SmallVector<StringRef> Strings(split(AggregateString, '\0')); 54 // We should always have an empty string at the start, and because these are 55 // null terminators rather than separators, we'll have one at the end as 56 // well. Skip the end one. 57 assert(Strings.front().empty() && "Expected empty initial string!"); 58 assert(Strings.back().empty() && 59 "Expected empty string at the end due to terminators!"); 60 Strings.pop_back(); 61 for (StringRef Str : Strings) { 62 OS << LineSep << " "; 63 // If we can, just emit this as a string literal to be concatenated. 64 if (!UseChars) { 65 OS << "\""; 66 OS.write_escaped(Str); 67 OS << "\\0\""; 68 continue; 69 } 70 71 ListSeparator CharSep(", "); 72 for (char C : Str) { 73 OS << CharSep << "'"; 74 OS.write_escaped(StringRef(&C, 1)); 75 OS << "'"; 76 } 77 OS << CharSep << "'\\0'"; 78 } 79 OS << LineSep << (UseChars ? "};" : " ;"); 80 81 OS << formatv(R"( 82 #ifdef __GNUC__ 83 #pragma GCC diagnostic pop 84 #endif 85 86 static constexpr llvm::StringTable 87 {0} = {0}Storage; 88 )", 89 Name); 90 } 91 92 void StringToOffsetTable::EmitString(raw_ostream &O) const { 93 // Escape the string. 94 SmallString<256> EscapedStr; 95 raw_svector_ostream(EscapedStr).write_escaped(AggregateString); 96 97 O << " \""; 98 unsigned CharsPrinted = 0; 99 for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) { 100 if (CharsPrinted > 70) { 101 O << "\"\n \""; 102 CharsPrinted = 0; 103 } 104 O << EscapedStr[i]; 105 ++CharsPrinted; 106 107 // Print escape sequences all together. 108 if (EscapedStr[i] != '\\') 109 continue; 110 111 assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!"); 112 if (isDigit(EscapedStr[i + 1])) { 113 assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) && 114 "Expected 3 digit octal escape!"); 115 O << EscapedStr[++i]; 116 O << EscapedStr[++i]; 117 O << EscapedStr[++i]; 118 CharsPrinted += 3; 119 } else { 120 O << EscapedStr[++i]; 121 ++CharsPrinted; 122 } 123 } 124 O << "\""; 125 } 126