xref: /freebsd/contrib/llvm-project/llvm/utils/TableGen/DFAEmitter.cpp (revision 85868e8a1daeaae7a0e48effb2ea2310ae3b02c6)
1 //===- DFAEmitter.cpp - Finite state automaton emitter --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class can produce a generic deterministic finite state automaton (DFA),
10 // given a set of possible states and transitions.
11 //
12 // The input transitions can be nondeterministic - this class will produce the
13 // deterministic equivalent state machine.
14 //
15 // The generated code can run the DFA and produce an accepted / not accepted
16 // state and also produce, given a sequence of transitions that results in an
17 // accepted state, the sequence of intermediate states. This is useful if the
18 // initial automaton was nondeterministic - it allows mapping back from the DFA
19 // to the NFA.
20 //
21 //===----------------------------------------------------------------------===//
22 #define DEBUG_TYPE "dfa-emitter"
23 
24 #include "DFAEmitter.h"
25 #include "CodeGenTarget.h"
26 #include "SequenceToOffsetTable.h"
27 #include "TableGenBackends.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/UniqueVector.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 #include <cassert>
36 #include <cstdint>
37 #include <map>
38 #include <set>
39 #include <string>
40 #include <vector>
41 
42 using namespace llvm;
43 
44 //===----------------------------------------------------------------------===//
45 // DfaEmitter implementation. This is independent of the GenAutomaton backend.
46 //===----------------------------------------------------------------------===//
47 
48 void DfaEmitter::addTransition(state_type From, state_type To, action_type A) {
49   Actions.insert(A);
50   NfaStates.insert(From);
51   NfaStates.insert(To);
52   NfaTransitions[{From, A}].push_back(To);
53   ++NumNfaTransitions;
54 }
55 
56 void DfaEmitter::visitDfaState(DfaState DS) {
57   // For every possible action...
58   auto FromId = DfaStates.idFor(DS);
59   for (action_type A : Actions) {
60     DfaState NewStates;
61     DfaTransitionInfo TI;
62     // For every represented state, word pair in the original NFA...
63     for (state_type &FromState : DS) {
64       // If this action is possible from this state add the transitioned-to
65       // states to NewStates.
66       auto I = NfaTransitions.find({FromState, A});
67       if (I == NfaTransitions.end())
68         continue;
69       for (state_type &ToState : I->second) {
70         NewStates.push_back(ToState);
71         TI.emplace_back(FromState, ToState);
72       }
73     }
74     if (NewStates.empty())
75       continue;
76     // Sort and unique.
77     sort(NewStates);
78     NewStates.erase(std::unique(NewStates.begin(), NewStates.end()),
79                     NewStates.end());
80     sort(TI);
81     TI.erase(std::unique(TI.begin(), TI.end()), TI.end());
82     unsigned ToId = DfaStates.insert(NewStates);
83     DfaTransitions.emplace(std::make_pair(FromId, A), std::make_pair(ToId, TI));
84   }
85 }
86 
87 void DfaEmitter::constructDfa() {
88   DfaState Initial(1, /*NFA initial state=*/0);
89   DfaStates.insert(Initial);
90 
91   // Note that UniqueVector starts indices at 1, not zero.
92   unsigned DfaStateId = 1;
93   while (DfaStateId <= DfaStates.size())
94     visitDfaState(DfaStates[DfaStateId++]);
95 }
96 
97 void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {
98   constructDfa();
99 
100   OS << "// Input NFA has " << NfaStates.size() << " states with "
101      << NumNfaTransitions << " transitions.\n";
102   OS << "// Generated DFA has " << DfaStates.size() << " states with "
103      << DfaTransitions.size() << " transitions.\n\n";
104 
105   // Implementation note: We don't bake a simple std::pair<> here as it requires
106   // significantly more effort to parse. A simple test with a large array of
107   // struct-pairs (N=100000) took clang-10 6s to parse. The same array of
108   // std::pair<uint64_t, uint64_t> took 242s. Instead we allow the user to
109   // define the pair type.
110   //
111   // FIXME: It may make sense to emit these as ULEB sequences instead of
112   // pairs of uint64_t.
113   OS << "// A zero-terminated sequence of NFA state transitions. Every DFA\n";
114   OS << "// transition implies a set of NFA transitions. These are referred\n";
115   OS << "// to by index in " << Name << "Transitions[].\n";
116 
117   SequenceToOffsetTable<DfaTransitionInfo> Table;
118   std::map<DfaTransitionInfo, unsigned> EmittedIndices;
119   for (auto &T : DfaTransitions)
120     Table.add(T.second.second);
121   Table.layout();
122   OS << "std::array<NfaStatePair, " << Table.size() << "> " << Name
123      << "TransitionInfo = {{\n";
124   Table.emit(
125       OS,
126       [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
127         OS << "{" << P.first << ", " << P.second << "}";
128       },
129       "{0ULL, 0ULL}");
130 
131   OS << "}};\n\n";
132 
133   OS << "// A transition in the generated " << Name << " DFA.\n";
134   OS << "struct " << Name << "Transition {\n";
135   OS << "  unsigned FromDfaState; // The transitioned-from DFA state.\n";
136   OS << "  ";
137   printActionType(OS);
138   OS << " Action;       // The input symbol that causes this transition.\n";
139   OS << "  unsigned ToDfaState;   // The transitioned-to DFA state.\n";
140   OS << "  unsigned InfoIdx;      // Start index into " << Name
141      << "TransitionInfo.\n";
142   OS << "};\n\n";
143 
144   OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n";
145   OS << "// The initial state is 1, not zero.\n";
146   OS << "std::array<" << Name << "Transition, " << DfaTransitions.size() << "> "
147      << Name << "Transitions = {{\n";
148   for (auto &KV : DfaTransitions) {
149     dfa_state_type From = KV.first.first;
150     dfa_state_type To = KV.second.first;
151     action_type A = KV.first.second;
152     unsigned InfoIdx = Table.get(KV.second.second);
153     OS << "  {" << From << ", ";
154     printActionValue(A, OS);
155     OS << ", " << To << ", " << InfoIdx << "},\n";
156   }
157   OS << "\n}};\n\n";
158 }
159 
160 void DfaEmitter::printActionType(raw_ostream &OS) { OS << "uint64_t"; }
161 
162 void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; }
163 
164 //===----------------------------------------------------------------------===//
165 // AutomatonEmitter implementation
166 //===----------------------------------------------------------------------===//
167 
168 namespace {
169 // FIXME: This entire discriminated union could be removed with c++17:
170 //   using Action = std::variant<Record *, unsigned, std::string>;
171 struct Action {
172   Record *R = nullptr;
173   unsigned I = 0;
174   std::string S = nullptr;
175 
176   Action() = default;
177   Action(Record *R, unsigned I, std::string S) : R(R), I(I), S(S) {}
178 
179   void print(raw_ostream &OS) const {
180     if (R)
181       OS << R->getName();
182     else if (!S.empty())
183       OS << '"' << S << '"';
184     else
185       OS << I;
186   }
187   bool operator<(const Action &Other) const {
188     return std::make_tuple(R, I, S) <
189            std::make_tuple(Other.R, Other.I, Other.S);
190   }
191 };
192 
193 using ActionTuple = std::vector<Action>;
194 class Automaton;
195 
196 class Transition {
197   uint64_t NewState;
198   // The tuple of actions that causes this transition.
199   ActionTuple Actions;
200   // The types of the actions; this is the same across all transitions.
201   SmallVector<std::string, 4> Types;
202 
203 public:
204   Transition(Record *R, Automaton *Parent);
205   const ActionTuple &getActions() { return Actions; }
206   SmallVector<std::string, 4> getTypes() { return Types; }
207 
208   bool canTransitionFrom(uint64_t State);
209   uint64_t transitionFrom(uint64_t State);
210 };
211 
212 class Automaton {
213   RecordKeeper &Records;
214   Record *R;
215   std::vector<Transition> Transitions;
216   /// All possible action tuples, uniqued.
217   UniqueVector<ActionTuple> Actions;
218   /// The fields within each Transition object to find the action symbols.
219   std::vector<StringRef> ActionSymbolFields;
220 
221 public:
222   Automaton(RecordKeeper &Records, Record *R);
223   void emit(raw_ostream &OS);
224 
225   ArrayRef<StringRef> getActionSymbolFields() { return ActionSymbolFields; }
226   /// If the type of action A has been overridden (there exists a field
227   /// "TypeOf_A") return that, otherwise return the empty string.
228   StringRef getActionSymbolType(StringRef A);
229 };
230 
231 class AutomatonEmitter {
232   RecordKeeper &Records;
233 
234 public:
235   AutomatonEmitter(RecordKeeper &R) : Records(R) {}
236   void run(raw_ostream &OS);
237 };
238 
239 /// A DfaEmitter implementation that can print our variant action type.
240 class CustomDfaEmitter : public DfaEmitter {
241   const UniqueVector<ActionTuple> &Actions;
242   std::string TypeName;
243 
244 public:
245   CustomDfaEmitter(const UniqueVector<ActionTuple> &Actions, StringRef TypeName)
246       : Actions(Actions), TypeName(TypeName) {}
247 
248   void printActionType(raw_ostream &OS) override;
249   void printActionValue(action_type A, raw_ostream &OS) override;
250 };
251 } // namespace
252 
253 void AutomatonEmitter::run(raw_ostream &OS) {
254   for (Record *R : Records.getAllDerivedDefinitions("GenericAutomaton")) {
255     Automaton A(Records, R);
256     OS << "#ifdef GET_" << R->getName() << "_DECL\n";
257     A.emit(OS);
258     OS << "#endif  // GET_" << R->getName() << "_DECL\n";
259   }
260 }
261 
262 Automaton::Automaton(RecordKeeper &Records, Record *R)
263     : Records(Records), R(R) {
264   LLVM_DEBUG(dbgs() << "Emitting automaton for " << R->getName() << "\n");
265   ActionSymbolFields = R->getValueAsListOfStrings("SymbolFields");
266 }
267 
268 void Automaton::emit(raw_ostream &OS) {
269   StringRef TransitionClass = R->getValueAsString("TransitionClass");
270   for (Record *T : Records.getAllDerivedDefinitions(TransitionClass)) {
271     assert(T->isSubClassOf("Transition"));
272     Transitions.emplace_back(T, this);
273     Actions.insert(Transitions.back().getActions());
274   }
275 
276   LLVM_DEBUG(dbgs() << "  Action alphabet cardinality: " << Actions.size()
277                     << "\n");
278   LLVM_DEBUG(dbgs() << "  Each state has " << Transitions.size()
279                     << " potential transitions.\n");
280 
281   StringRef Name = R->getName();
282 
283   CustomDfaEmitter Emitter(Actions, std::string(Name) + "Action");
284   // Starting from the initial state, build up a list of possible states and
285   // transitions.
286   std::deque<uint64_t> Worklist(1, 0);
287   std::set<uint64_t> SeenStates;
288   unsigned NumTransitions = 0;
289   SeenStates.insert(Worklist.front());
290   while (!Worklist.empty()) {
291     uint64_t State = Worklist.front();
292     Worklist.pop_front();
293     for (Transition &T : Transitions) {
294       if (!T.canTransitionFrom(State))
295         continue;
296       uint64_t NewState = T.transitionFrom(State);
297       if (SeenStates.emplace(NewState).second)
298         Worklist.emplace_back(NewState);
299       ++NumTransitions;
300       Emitter.addTransition(State, NewState, Actions.idFor(T.getActions()));
301     }
302   }
303   LLVM_DEBUG(dbgs() << "  NFA automaton has " << SeenStates.size()
304                     << " states with " << NumTransitions << " transitions.\n");
305 
306   const auto &ActionTypes = Transitions.back().getTypes();
307   OS << "// The type of an action in the " << Name << " automaton.\n";
308   if (ActionTypes.size() == 1) {
309     OS << "using " << Name << "Action = " << ActionTypes[0] << ";\n";
310   } else {
311     OS << "using " << Name << "Action = std::tuple<" << join(ActionTypes, ", ")
312        << ">;\n";
313   }
314   OS << "\n";
315 
316   Emitter.emit(Name, OS);
317 }
318 
319 StringRef Automaton::getActionSymbolType(StringRef A) {
320   Twine Ty = "TypeOf_" + A;
321   if (!R->getValue(Ty.str()))
322     return "";
323   return R->getValueAsString(Ty.str());
324 }
325 
326 Transition::Transition(Record *R, Automaton *Parent) {
327   BitsInit *NewStateInit = R->getValueAsBitsInit("NewState");
328   NewState = 0;
329   assert(NewStateInit->getNumBits() <= sizeof(uint64_t) * 8 &&
330          "State cannot be represented in 64 bits!");
331   for (unsigned I = 0; I < NewStateInit->getNumBits(); ++I) {
332     if (auto *Bit = dyn_cast<BitInit>(NewStateInit->getBit(I))) {
333       if (Bit->getValue())
334         NewState |= 1ULL << I;
335     }
336   }
337 
338   for (StringRef A : Parent->getActionSymbolFields()) {
339     RecordVal *SymbolV = R->getValue(A);
340     if (auto *Ty = dyn_cast<RecordRecTy>(SymbolV->getType())) {
341       Actions.emplace_back(R->getValueAsDef(A), 0, "");
342       Types.emplace_back(Ty->getAsString());
343     } else if (isa<IntRecTy>(SymbolV->getType())) {
344       Actions.emplace_back(nullptr, R->getValueAsInt(A), "");
345       Types.emplace_back("unsigned");
346     } else if (isa<StringRecTy>(SymbolV->getType()) ||
347                isa<CodeRecTy>(SymbolV->getType())) {
348       Actions.emplace_back(nullptr, 0, R->getValueAsString(A));
349       Types.emplace_back("std::string");
350     } else {
351       report_fatal_error("Unhandled symbol type!");
352     }
353 
354     StringRef TypeOverride = Parent->getActionSymbolType(A);
355     if (!TypeOverride.empty())
356       Types.back() = TypeOverride;
357   }
358 }
359 
360 bool Transition::canTransitionFrom(uint64_t State) {
361   if ((State & NewState) == 0)
362     // The bits we want to set are not set;
363     return true;
364   return false;
365 }
366 
367 uint64_t Transition::transitionFrom(uint64_t State) {
368   return State | NewState;
369 }
370 
371 void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; }
372 
373 void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) {
374   const ActionTuple &AT = Actions[A];
375   if (AT.size() > 1)
376     OS << "std::make_tuple(";
377   bool First = true;
378   for (const auto &SingleAction : AT) {
379     if (!First)
380       OS << ", ";
381     First = false;
382     SingleAction.print(OS);
383   }
384   if (AT.size() > 1)
385     OS << ")";
386 }
387 
388 namespace llvm {
389 
390 void EmitAutomata(RecordKeeper &RK, raw_ostream &OS) {
391   AutomatonEmitter(RK).run(OS);
392 }
393 
394 } // namespace llvm
395