1 //===- StringTable.h - Table of strings tracked by offset ----------C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_ADT_STRING_TABLE_H 10 #define LLVM_ADT_STRING_TABLE_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/ADT/iterator.h" 14 #include <iterator> 15 #include <limits> 16 17 namespace llvm { 18 19 /// A table of densely packed, null-terminated strings indexed by offset. 20 /// 21 /// This table abstracts a densely concatenated list of null-terminated strings, 22 /// each of which can be referenced using an offset into the table. 23 /// 24 /// This requires and ensures that the string at offset 0 is also the empty 25 /// string. This helps allow zero-initialized offsets form empty strings and 26 /// avoids non-zero initialization when using a string literal pointer would 27 /// allow a null pointer. 28 /// 29 /// The primary use case is having a single global string literal for the table 30 /// contents, and offsets into it in other global data structures to avoid 31 /// dynamic relocations of individual string literal pointers in those global 32 /// data structures. 33 class StringTable { 34 StringRef Table; 35 36 public: 37 // An offset into one of these packed string tables, used to select a string 38 // within the table. 39 // 40 // Typically these are created by TableGen or other code generator from 41 // computed offsets, and it just wraps that integer into a type until it is 42 // used with the relevant table. 43 // 44 // We also ensure that the empty string is at offset zero and default 45 // constructing this class gives you an offset of zero. This makes default 46 // constructing this type work similarly (after indexing the table) to default 47 // constructing a `StringRef`. 48 class Offset { 49 // Note that we ensure the empty string is at offset zero. 50 unsigned Value = 0; 51 52 public: 53 constexpr Offset() = default; Offset(unsigned Value)54 constexpr Offset(unsigned Value) : Value(Value) {} 55 56 friend constexpr bool operator==(const Offset &LHS, const Offset &RHS) { 57 return LHS.Value == RHS.Value; 58 } 59 60 friend constexpr bool operator!=(const Offset &LHS, const Offset &RHS) { 61 return LHS.Value != RHS.Value; 62 } 63 value()64 constexpr unsigned value() const { return Value; } 65 }; 66 67 // We directly handle string literals with a templated converting constructor 68 // because we *don't* want to do `strlen` on them -- we fully expect null 69 // bytes in this input. This is somewhat the opposite of how `StringLiteral` 70 // works. 71 template <size_t N> StringTable(const char (& RawTable)[N])72 constexpr StringTable(const char (&RawTable)[N]) : Table(RawTable, N) { 73 static_assert(N <= std::numeric_limits<unsigned>::max(), 74 "We only support table sizes that can be indexed by an " 75 "`unsigned` offset."); 76 77 // Note that we can only use `empty`, `data`, and `size` in these asserts to 78 // support `constexpr`. 79 assert(!Table.empty() && "Requires at least a valid empty string."); 80 assert(Table.data()[0] == '\0' && "Offset zero must be the empty string."); 81 // Regardless of how many strings are in the table, the last one should also 82 // be null terminated. This also ensures that computing `strlen` on the 83 // strings can't accidentally run past the end of the table. 84 assert(Table.data()[Table.size() - 1] == '\0' && 85 "Last byte must be a null byte."); 86 } 87 88 // Get a string from the table starting with the provided offset. The returned 89 // `StringRef` is in fact null terminated, and so can be converted safely to a 90 // C-string if necessary for a system API. 91 constexpr StringRef operator[](Offset O) const { 92 assert(O.value() < Table.size() && "Out of bounds offset!"); 93 return Table.data() + O.value(); 94 } 95 96 /// Returns the byte size of the table. size()97 constexpr size_t size() const { return Table.size(); } 98 99 class Iterator 100 : public iterator_facade_base<Iterator, std::forward_iterator_tag, 101 const StringRef> { 102 friend StringTable; 103 104 const StringTable *Table; 105 Offset O; 106 107 // A cache of one value to allow `*` to return a reference. 108 mutable StringRef S; 109 Iterator(const StringTable & Table,Offset O)110 explicit constexpr Iterator(const StringTable &Table, Offset O) 111 : Table(&Table), O(O) {} 112 113 public: 114 constexpr Iterator(const Iterator &RHS) = default; 115 constexpr Iterator(Iterator &&RHS) = default; 116 117 bool operator==(const Iterator &RHS) const { 118 assert(Table == RHS.Table && "Compared iterators for unrelated tables!"); 119 return O == RHS.O; 120 } 121 122 const StringRef &operator*() const { 123 S = (*Table)[O]; 124 return S; 125 } 126 127 Iterator &operator++() { 128 O = O.value() + (*Table)[O].size() + 1; 129 return *this; 130 } 131 }; 132 begin()133 constexpr Iterator begin() const { return Iterator(*this, 0); } end()134 constexpr Iterator end() const { return Iterator(*this, size() - 1); } 135 }; 136 137 } // namespace llvm 138 139 #endif // LLVM_ADT_STRING_TABLE_H 140