xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ADT/StringTable.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- StringTable.h - Table of strings tracked by offset ----------C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_ADT_STRING_TABLE_H
10 #define LLVM_ADT_STRING_TABLE_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/ADT/iterator.h"
14 #include <iterator>
15 #include <limits>
16 
17 namespace llvm {
18 
19 /// A table of densely packed, null-terminated strings indexed by offset.
20 ///
21 /// This table abstracts a densely concatenated list of null-terminated strings,
22 /// each of which can be referenced using an offset into the table.
23 ///
24 /// This requires and ensures that the string at offset 0 is also the empty
25 /// string. This helps allow zero-initialized offsets form empty strings and
26 /// avoids non-zero initialization when using a string literal pointer would
27 /// allow a null pointer.
28 ///
29 /// The primary use case is having a single global string literal for the table
30 /// contents, and offsets into it in other global data structures to avoid
31 /// dynamic relocations of individual string literal pointers in those global
32 /// data structures.
33 class StringTable {
34   StringRef Table;
35 
36 public:
37   // An offset into one of these packed string tables, used to select a string
38   // within the table.
39   //
40   // Typically these are created by TableGen or other code generator from
41   // computed offsets, and it just wraps that integer into a type until it is
42   // used with the relevant table.
43   //
44   // We also ensure that the empty string is at offset zero and default
45   // constructing this class gives you an offset of zero. This makes default
46   // constructing this type work similarly (after indexing the table) to default
47   // constructing a `StringRef`.
48   class Offset {
49     // Note that we ensure the empty string is at offset zero.
50     unsigned Value = 0;
51 
52   public:
53     constexpr Offset() = default;
Offset(unsigned Value)54     constexpr Offset(unsigned Value) : Value(Value) {}
55 
56     friend constexpr bool operator==(const Offset &LHS, const Offset &RHS) {
57       return LHS.Value == RHS.Value;
58     }
59 
60     friend constexpr bool operator!=(const Offset &LHS, const Offset &RHS) {
61       return LHS.Value != RHS.Value;
62     }
63 
value()64     constexpr unsigned value() const { return Value; }
65   };
66 
67   // We directly handle string literals with a templated converting constructor
68   // because we *don't* want to do `strlen` on them -- we fully expect null
69   // bytes in this input. This is somewhat the opposite of how `StringLiteral`
70   // works.
71   template <size_t N>
StringTable(const char (& RawTable)[N])72   constexpr StringTable(const char (&RawTable)[N]) : Table(RawTable, N) {
73     static_assert(N <= std::numeric_limits<unsigned>::max(),
74                   "We only support table sizes that can be indexed by an "
75                   "`unsigned` offset.");
76 
77     // Note that we can only use `empty`, `data`, and `size` in these asserts to
78     // support `constexpr`.
79     assert(!Table.empty() && "Requires at least a valid empty string.");
80     assert(Table.data()[0] == '\0' && "Offset zero must be the empty string.");
81     // Regardless of how many strings are in the table, the last one should also
82     // be null terminated. This also ensures that computing `strlen` on the
83     // strings can't accidentally run past the end of the table.
84     assert(Table.data()[Table.size() - 1] == '\0' &&
85            "Last byte must be a null byte.");
86   }
87 
88   // Get a string from the table starting with the provided offset. The returned
89   // `StringRef` is in fact null terminated, and so can be converted safely to a
90   // C-string if necessary for a system API.
91   constexpr StringRef operator[](Offset O) const {
92     assert(O.value() < Table.size() && "Out of bounds offset!");
93     return Table.data() + O.value();
94   }
95 
96   /// Returns the byte size of the table.
size()97   constexpr size_t size() const { return Table.size(); }
98 
99   class Iterator
100       : public iterator_facade_base<Iterator, std::forward_iterator_tag,
101                                     const StringRef> {
102     friend StringTable;
103 
104     const StringTable *Table;
105     Offset O;
106 
107     // A cache of one value to allow `*` to return a reference.
108     mutable StringRef S;
109 
Iterator(const StringTable & Table,Offset O)110     explicit constexpr Iterator(const StringTable &Table, Offset O)
111         : Table(&Table), O(O) {}
112 
113   public:
114     constexpr Iterator(const Iterator &RHS) = default;
115     constexpr Iterator(Iterator &&RHS) = default;
116 
117     bool operator==(const Iterator &RHS) const {
118       assert(Table == RHS.Table && "Compared iterators for unrelated tables!");
119       return O == RHS.O;
120     }
121 
122     const StringRef &operator*() const {
123       S = (*Table)[O];
124       return S;
125     }
126 
127     Iterator &operator++() {
128       O = O.value() + (*Table)[O].size() + 1;
129       return *this;
130     }
131   };
132 
begin()133   constexpr Iterator begin() const { return Iterator(*this, 0); }
end()134   constexpr Iterator end() const { return Iterator(*this, size() - 1); }
135 };
136 
137 } // namespace llvm
138 
139 #endif // LLVM_ADT_STRING_TABLE_H
140