xref: /freebsd/contrib/llvm-project/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // SequenceToOffsetTable can be used to emit a number of sequences as one big
10 // array. Uses the same memory when a sequence is a suffix of another.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
15 #define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
16 
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/Support/raw_ostream.h"
19 #include "llvm/TableGen/Main.h"
20 #include <algorithm>
21 #include <cassert>
22 #include <functional>
23 #include <map>
24 
25 namespace llvm {
26 
printChar(raw_ostream & OS,char C)27 inline void printChar(raw_ostream &OS, char C) {
28   unsigned char UC(C);
29   if (isAlnum(UC) || isPunct(UC)) {
30     OS << '\'';
31     if (C == '\\' || C == '\'')
32       OS << '\\';
33     OS << C << '\'';
34   } else {
35     OS << unsigned(UC);
36   }
37 }
38 
39 /// SequenceToOffsetTable - Collect a number of terminated sequences of T.
40 /// Compute the layout of a table that contains all the sequences, possibly by
41 /// reusing entries.
42 ///
43 /// @tparam SeqT The sequence container. (vector or string).
44 /// @tparam Less A stable comparator for SeqT elements.
45 template <typename SeqT, typename Less = std::less<typename SeqT::value_type>>
46 class SequenceToOffsetTable {
47   typedef typename SeqT::value_type ElemT;
48 
49   // Define a comparator for SeqT that sorts a suffix immediately before a
50   // sequence with that suffix.
51   struct SeqLess {
52     Less L;
operatorSeqLess53     bool operator()(const SeqT &A, const SeqT &B) const {
54       return std::lexicographical_compare(A.rbegin(), A.rend(), B.rbegin(),
55                                           B.rend(), L);
56     }
57   };
58 
59   // Keep sequences ordered according to SeqLess so suffixes are easy to find.
60   // Map each sequence to its offset in the table.
61   typedef std::map<SeqT, unsigned, SeqLess> SeqMap;
62 
63   // Sequences added so far, with suffixes removed.
64   SeqMap Seqs;
65 
66   // Terminator element to be appended to each added sequence.
67   std::optional<ElemT> Terminator;
68 
69   // True if `layout` method was called.
70   bool IsLaidOut = false;
71 
72   // Entries in the final table, or 0 before layout was called.
73   unsigned Entries = 0;
74 
75   // isSuffix - Returns true if A is a suffix of B.
isSuffix(const SeqT & A,const SeqT & B)76   static bool isSuffix(const SeqT &A, const SeqT &B) {
77     return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin());
78   }
79 
80 public:
81   explicit SequenceToOffsetTable(std::optional<ElemT> Terminator = ElemT())
Terminator(Terminator)82       : Terminator(Terminator) {}
83 
84   /// add - Add a sequence to the table.
85   /// This must be called before layout().
add(const SeqT & Seq)86   void add(const SeqT &Seq) {
87     assert(!IsLaidOut && "Cannot call add() after layout()");
88     typename SeqMap::iterator I = Seqs.lower_bound(Seq);
89 
90     // If SeqMap contains a sequence that has Seq as a suffix, I will be
91     // pointing to it.
92     if (I != Seqs.end() && isSuffix(Seq, I->first))
93       return;
94 
95     I = Seqs.insert(I, {Seq, 0u});
96 
97     // The entry before I may be a suffix of Seq that can now be erased.
98     if (I != Seqs.begin() && isSuffix((--I)->first, Seq))
99       Seqs.erase(I);
100   }
101 
empty()102   bool empty() const { return Seqs.empty(); }
103 
size()104   unsigned size() const {
105     assert(IsLaidOut && "Call layout() before size()");
106     return Entries;
107   }
108 
109   /// layout - Computes the final table layout.
layout()110   void layout() {
111     assert(!IsLaidOut && "Can only call layout() once");
112     IsLaidOut = true;
113 
114     // Lay out the table in Seqs iteration order.
115     for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E;
116          ++I) {
117       I->second = Entries;
118       // Include space for a terminator.
119       Entries += I->first.size() + Terminator.has_value();
120     }
121   }
122 
123   /// get - Returns the offset of Seq in the final table.
get(const SeqT & Seq)124   unsigned get(const SeqT &Seq) const {
125     assert(IsLaidOut && "Call layout() before get()");
126     typename SeqMap::const_iterator I = Seqs.lower_bound(Seq);
127     assert(I != Seqs.end() && isSuffix(Seq, I->first) &&
128            "get() called with sequence that wasn't added first");
129     return I->second + (I->first.size() - Seq.size());
130   }
131 
132   /// `emitStringLiteralDef` - Print out the table as the body of an array
133   /// initializer, where each element is a C string literal terminated by
134   /// `\0`. Falls back to emitting a comma-separated integer list if
135   /// `EmitLongStrLiterals` is false
emitStringLiteralDef(raw_ostream & OS,const Twine & Decl)136   void emitStringLiteralDef(raw_ostream &OS, const Twine &Decl) const {
137     assert(IsLaidOut && "Call layout() before emitStringLiteralDef()");
138     if (!EmitLongStrLiterals) {
139       OS << Decl << " = {\n";
140       emit(OS, printChar);
141       OS << "  0\n};\n\n";
142       return;
143     }
144 
145     OS << "\n#ifdef __GNUC__\n"
146        << "#pragma GCC diagnostic push\n"
147        << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"
148        << "#endif\n"
149        << Decl << " = {\n";
150     for (const auto &[Seq, Offset] : Seqs) {
151       OS << "  /* " << Offset << " */ \"";
152       OS.write_escaped(Seq);
153       if (Terminator)
154         OS.write_escaped(StringRef(&*Terminator, 1));
155       OS << "\"\n";
156     }
157     OS << "};\n"
158        << "#ifdef __GNUC__\n"
159        << "#pragma GCC diagnostic pop\n"
160        << "#endif\n\n";
161   }
162 
163   /// emit - Print out the table as the body of an array initializer.
164   /// Use the Print function to print elements.
emit(raw_ostream & OS,void (* Print)(raw_ostream &,ElemT))165   void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT)) const {
166     assert(IsLaidOut && "Call layout() before emit()");
167     for (const auto &[Seq, Offset] : Seqs) {
168       OS << "  /* " << Offset << " */ ";
169       for (const ElemT &Element : Seq) {
170         Print(OS, Element);
171         OS << ", ";
172       }
173       if (Terminator) {
174         Print(OS, *Terminator);
175         OS << ',';
176       }
177       OS << '\n';
178     }
179 
180     // Print a dummy element if the array would be empty otherwise.
181     if (!Entries) {
182       OS << "  /* dummy */ ";
183       Print(OS, ElemT());
184       OS << '\n';
185     }
186   }
187 };
188 
189 } // end namespace llvm
190 
191 #endif // LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
192