1 //===- StringToOffsetTable.h - Emit a big concatenated string ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_TABLEGEN_STRINGTOOFFSETTABLE_H 10 #define LLVM_TABLEGEN_STRINGTOOFFSETTABLE_H 11 12 #include "llvm/ADT/SmallString.h" 13 #include "llvm/ADT/StringExtras.h" 14 #include "llvm/ADT/StringMap.h" 15 #include <optional> 16 17 namespace llvm { 18 19 /// StringToOffsetTable - This class uniques a bunch of nul-terminated strings 20 /// and keeps track of their offset in a massive contiguous string allocation. 21 /// It can then output this string blob and use indexes into the string to 22 /// reference each piece. 23 class StringToOffsetTable { 24 StringMap<unsigned> StringOffset; 25 std::string AggregateString; 26 const bool AppendZero; 27 28 public: AppendZero(AppendZero)29 StringToOffsetTable(bool AppendZero = true) : AppendZero(AppendZero) { 30 // Ensure we always put the empty string at offset zero. That lets empty 31 // initialization also be zero initialization for offsets into the table. 32 GetOrAddStringOffset(""); 33 } 34 empty()35 bool empty() const { return StringOffset.empty(); } size()36 size_t size() const { return AggregateString.size(); } 37 38 unsigned GetOrAddStringOffset(StringRef Str); 39 40 // Returns the offset of `Str` in the table if its preset, else return 41 // std::nullopt. GetStringOffset(StringRef Str)42 std::optional<unsigned> GetStringOffset(StringRef Str) const { 43 auto II = StringOffset.find(Str); 44 if (II == StringOffset.end()) 45 return std::nullopt; 46 return II->second; 47 } 48 49 // Emit a string table definition with the provided name. 50 // 51 // When possible, this uses string-literal concatenation to emit the string 52 // contents in a readable and searchable way. However, for (very) large string 53 // tables MSVC cannot reliably use string literals and so there we use a large 54 // character array. We still use a line oriented emission and add comments to 55 // provide searchability even in this case. 56 // 57 // The string table, and its input string contents, are always emitted as both 58 // `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be 59 // valid identifiers to declare. 60 void EmitStringTableDef(raw_ostream &OS, const Twine &Name) const; 61 62 // Emit the string as one single string. 63 void EmitString(raw_ostream &O) const; 64 }; 65 66 } // end namespace llvm 67 68 #endif 69