xref: /freebsd/contrib/llvm-project/llvm/utils/TableGen/DFAEmitter.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- DFAEmitter.cpp - Finite state automaton emitter --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class can produce a generic deterministic finite state automaton (DFA),
10 // given a set of possible states and transitions.
11 //
12 // The input transitions can be nondeterministic - this class will produce the
13 // deterministic equivalent state machine.
14 //
15 // The generated code can run the DFA and produce an accepted / not accepted
16 // state and also produce, given a sequence of transitions that results in an
17 // accepted state, the sequence of intermediate states. This is useful if the
18 // initial automaton was nondeterministic - it allows mapping back from the DFA
19 // to the NFA.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "DFAEmitter.h"
24 #include "Basic/SequenceToOffsetTable.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/ADT/UniqueVector.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/TableGen/Record.h"
31 #include "llvm/TableGen/TableGenBackend.h"
32 #include <cassert>
33 #include <cstdint>
34 #include <deque>
35 #include <map>
36 #include <set>
37 #include <string>
38 #include <variant>
39 #include <vector>
40 
41 #define DEBUG_TYPE "dfa-emitter"
42 
43 using namespace llvm;
44 
45 //===----------------------------------------------------------------------===//
46 // DfaEmitter implementation. This is independent of the GenAutomaton backend.
47 //===----------------------------------------------------------------------===//
48 
addTransition(state_type From,state_type To,action_type A)49 void DfaEmitter::addTransition(state_type From, state_type To, action_type A) {
50   Actions.insert(A);
51   NfaStates.insert(From);
52   NfaStates.insert(To);
53   NfaTransitions[{From, A}].push_back(To);
54   ++NumNfaTransitions;
55 }
56 
visitDfaState(const DfaState & DS)57 void DfaEmitter::visitDfaState(const DfaState &DS) {
58   // For every possible action...
59   auto FromId = DfaStates.idFor(DS);
60   for (action_type A : Actions) {
61     DfaState NewStates;
62     DfaTransitionInfo TI;
63     // For every represented state, word pair in the original NFA...
64     for (state_type FromState : DS) {
65       // If this action is possible from this state add the transitioned-to
66       // states to NewStates.
67       auto I = NfaTransitions.find({FromState, A});
68       if (I == NfaTransitions.end())
69         continue;
70       for (state_type &ToState : I->second) {
71         NewStates.push_back(ToState);
72         TI.emplace_back(FromState, ToState);
73       }
74     }
75     if (NewStates.empty())
76       continue;
77     // Sort and unique.
78     sort(NewStates);
79     NewStates.erase(llvm::unique(NewStates), NewStates.end());
80     sort(TI);
81     TI.erase(llvm::unique(TI), TI.end());
82     unsigned ToId = DfaStates.insert(NewStates);
83     DfaTransitions.emplace(std::pair(FromId, A), std::pair(ToId, TI));
84   }
85 }
86 
constructDfa()87 void DfaEmitter::constructDfa() {
88   DfaState Initial(1, /*NFA initial state=*/0);
89   DfaStates.insert(Initial);
90 
91   // Note that UniqueVector starts indices at 1, not zero.
92   unsigned DfaStateId = 1;
93   while (DfaStateId <= DfaStates.size()) {
94     DfaState S = DfaStates[DfaStateId];
95     visitDfaState(S);
96     DfaStateId++;
97   }
98 }
99 
emit(StringRef Name,raw_ostream & OS)100 void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {
101   constructDfa();
102 
103   OS << "// Input NFA has " << NfaStates.size() << " states with "
104      << NumNfaTransitions << " transitions.\n";
105   OS << "// Generated DFA has " << DfaStates.size() << " states with "
106      << DfaTransitions.size() << " transitions.\n\n";
107 
108   // Implementation note: We don't bake a simple std::pair<> here as it requires
109   // significantly more effort to parse. A simple test with a large array of
110   // struct-pairs (N=100000) took clang-10 6s to parse. The same array of
111   // std::pair<uint64_t, uint64_t> took 242s. Instead we allow the user to
112   // define the pair type.
113   //
114   // FIXME: It may make sense to emit these as ULEB sequences instead of
115   // pairs of uint64_t.
116   OS << "// A zero-terminated sequence of NFA state transitions. Every DFA\n";
117   OS << "// transition implies a set of NFA transitions. These are referred\n";
118   OS << "// to by index in " << Name << "Transitions[].\n";
119 
120   SequenceToOffsetTable<DfaTransitionInfo> Table;
121   for (auto &T : DfaTransitions)
122     Table.add(T.second.second);
123   Table.layout();
124   OS << "const std::array<NfaStatePair, " << Table.size() << "> " << Name
125      << "TransitionInfo = {{\n";
126   Table.emit(OS, [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
127     OS << "{" << P.first << ", " << P.second << "}";
128   });
129 
130   OS << "}};\n\n";
131 
132   OS << "// A transition in the generated " << Name << " DFA.\n";
133   OS << "struct " << Name << "Transition {\n";
134   OS << "  unsigned FromDfaState; // The transitioned-from DFA state.\n";
135   OS << "  ";
136   printActionType(OS);
137   OS << " Action;       // The input symbol that causes this transition.\n";
138   OS << "  unsigned ToDfaState;   // The transitioned-to DFA state.\n";
139   OS << "  unsigned InfoIdx;      // Start index into " << Name
140      << "TransitionInfo.\n";
141   OS << "};\n\n";
142 
143   OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n";
144   OS << "// The initial state is 1, not zero.\n";
145   OS << "const std::array<" << Name << "Transition, " << DfaTransitions.size()
146      << "> " << Name << "Transitions = {{\n";
147   for (auto &KV : DfaTransitions) {
148     dfa_state_type From = KV.first.first;
149     dfa_state_type To = KV.second.first;
150     action_type A = KV.first.second;
151     unsigned InfoIdx = Table.get(KV.second.second);
152     OS << "  {" << From << ", ";
153     printActionValue(A, OS);
154     OS << ", " << To << ", " << InfoIdx << "},\n";
155   }
156   OS << "\n}};\n\n";
157 }
158 
printActionType(raw_ostream & OS)159 void DfaEmitter::printActionType(raw_ostream &OS) { OS << "uint64_t"; }
160 
printActionValue(action_type A,raw_ostream & OS)161 void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; }
162 
163 //===----------------------------------------------------------------------===//
164 // AutomatonEmitter implementation
165 //===----------------------------------------------------------------------===//
166 
167 namespace {
168 
169 using Action = std::variant<const Record *, unsigned, std::string>;
170 using ActionTuple = std::vector<Action>;
171 class Automaton;
172 
173 class Transition {
174   uint64_t NewState;
175   // The tuple of actions that causes this transition.
176   ActionTuple Actions;
177   // The types of the actions; this is the same across all transitions.
178   SmallVector<std::string, 4> Types;
179 
180 public:
181   Transition(const Record *R, Automaton *Parent);
getActions()182   const ActionTuple &getActions() { return Actions; }
getTypes()183   SmallVector<std::string, 4> getTypes() { return Types; }
184 
185   bool canTransitionFrom(uint64_t State);
186   uint64_t transitionFrom(uint64_t State);
187 };
188 
189 class Automaton {
190   const RecordKeeper &Records;
191   const Record *R;
192   std::vector<Transition> Transitions;
193   /// All possible action tuples, uniqued.
194   UniqueVector<ActionTuple> Actions;
195   /// The fields within each Transition object to find the action symbols.
196   std::vector<StringRef> ActionSymbolFields;
197 
198 public:
199   Automaton(const RecordKeeper &Records, const Record *R);
200   void emit(raw_ostream &OS);
201 
getActionSymbolFields()202   ArrayRef<StringRef> getActionSymbolFields() { return ActionSymbolFields; }
203   /// If the type of action A has been overridden (there exists a field
204   /// "TypeOf_A") return that, otherwise return the empty string.
205   StringRef getActionSymbolType(StringRef A);
206 };
207 
208 class AutomatonEmitter {
209   const RecordKeeper &Records;
210 
211 public:
AutomatonEmitter(const RecordKeeper & R)212   AutomatonEmitter(const RecordKeeper &R) : Records(R) {}
213   void run(raw_ostream &OS);
214 };
215 
216 /// A DfaEmitter implementation that can print our variant action type.
217 class CustomDfaEmitter : public DfaEmitter {
218   const UniqueVector<ActionTuple> &Actions;
219   std::string TypeName;
220 
221 public:
CustomDfaEmitter(const UniqueVector<ActionTuple> & Actions,StringRef TypeName)222   CustomDfaEmitter(const UniqueVector<ActionTuple> &Actions, StringRef TypeName)
223       : Actions(Actions), TypeName(TypeName) {}
224 
225   void printActionType(raw_ostream &OS) override;
226   void printActionValue(action_type A, raw_ostream &OS) override;
227 };
228 } // namespace
229 
run(raw_ostream & OS)230 void AutomatonEmitter::run(raw_ostream &OS) {
231   for (const Record *R : Records.getAllDerivedDefinitions("GenericAutomaton")) {
232     Automaton A(Records, R);
233     OS << "#ifdef GET_" << R->getName() << "_DECL\n";
234     A.emit(OS);
235     OS << "#endif  // GET_" << R->getName() << "_DECL\n";
236   }
237 }
238 
Automaton(const RecordKeeper & Records,const Record * R)239 Automaton::Automaton(const RecordKeeper &Records, const Record *R)
240     : Records(Records), R(R) {
241   LLVM_DEBUG(dbgs() << "Emitting automaton for " << R->getName() << "\n");
242   ActionSymbolFields = R->getValueAsListOfStrings("SymbolFields");
243 }
244 
emit(raw_ostream & OS)245 void Automaton::emit(raw_ostream &OS) {
246   StringRef TransitionClass = R->getValueAsString("TransitionClass");
247   for (const Record *T : Records.getAllDerivedDefinitions(TransitionClass)) {
248     assert(T->isSubClassOf("Transition"));
249     Transitions.emplace_back(T, this);
250     Actions.insert(Transitions.back().getActions());
251   }
252 
253   LLVM_DEBUG(dbgs() << "  Action alphabet cardinality: " << Actions.size()
254                     << "\n");
255   LLVM_DEBUG(dbgs() << "  Each state has " << Transitions.size()
256                     << " potential transitions.\n");
257 
258   StringRef Name = R->getName();
259 
260   CustomDfaEmitter Emitter(Actions, Name.str() + "Action");
261   // Starting from the initial state, build up a list of possible states and
262   // transitions.
263   std::deque<uint64_t> Worklist(1, 0);
264   std::set<uint64_t> SeenStates;
265   unsigned NumTransitions = 0;
266   SeenStates.insert(Worklist.front());
267   while (!Worklist.empty()) {
268     uint64_t State = Worklist.front();
269     Worklist.pop_front();
270     for (Transition &T : Transitions) {
271       if (!T.canTransitionFrom(State))
272         continue;
273       uint64_t NewState = T.transitionFrom(State);
274       if (SeenStates.emplace(NewState).second)
275         Worklist.emplace_back(NewState);
276       ++NumTransitions;
277       Emitter.addTransition(State, NewState, Actions.idFor(T.getActions()));
278     }
279   }
280   LLVM_DEBUG(dbgs() << "  NFA automaton has " << SeenStates.size()
281                     << " states with " << NumTransitions << " transitions.\n");
282   (void)NumTransitions;
283 
284   const auto &ActionTypes = Transitions.back().getTypes();
285   OS << "// The type of an action in the " << Name << " automaton.\n";
286   if (ActionTypes.size() == 1) {
287     OS << "using " << Name << "Action = " << ActionTypes[0] << ";\n";
288   } else {
289     OS << "using " << Name << "Action = std::tuple<" << join(ActionTypes, ", ")
290        << ">;\n";
291   }
292   OS << "\n";
293 
294   Emitter.emit(Name, OS);
295 }
296 
getActionSymbolType(StringRef A)297 StringRef Automaton::getActionSymbolType(StringRef A) {
298   Twine Ty = "TypeOf_" + A;
299   if (!R->getValue(Ty.str()))
300     return "";
301   return R->getValueAsString(Ty.str());
302 }
303 
Transition(const Record * R,Automaton * Parent)304 Transition::Transition(const Record *R, Automaton *Parent) {
305   const BitsInit *NewStateInit = R->getValueAsBitsInit("NewState");
306   NewState = 0;
307   assert(NewStateInit->getNumBits() <= sizeof(uint64_t) * 8 &&
308          "State cannot be represented in 64 bits!");
309   for (unsigned I = 0; I < NewStateInit->getNumBits(); ++I) {
310     if (auto *Bit = dyn_cast<BitInit>(NewStateInit->getBit(I))) {
311       if (Bit->getValue())
312         NewState |= 1ULL << I;
313     }
314   }
315 
316   for (StringRef A : Parent->getActionSymbolFields()) {
317     const RecordVal *SymbolV = R->getValue(A);
318     if (const auto *Ty = dyn_cast<RecordRecTy>(SymbolV->getType())) {
319       Actions.emplace_back(R->getValueAsDef(A));
320       Types.emplace_back(Ty->getAsString());
321     } else if (isa<IntRecTy>(SymbolV->getType())) {
322       Actions.emplace_back(static_cast<unsigned>(R->getValueAsInt(A)));
323       Types.emplace_back("unsigned");
324     } else if (isa<StringRecTy>(SymbolV->getType())) {
325       Actions.emplace_back(R->getValueAsString(A).str());
326       Types.emplace_back("std::string");
327     } else {
328       report_fatal_error("Unhandled symbol type!");
329     }
330 
331     StringRef TypeOverride = Parent->getActionSymbolType(A);
332     if (!TypeOverride.empty())
333       Types.back() = TypeOverride.str();
334   }
335 }
336 
canTransitionFrom(uint64_t State)337 bool Transition::canTransitionFrom(uint64_t State) {
338   if ((State & NewState) == 0)
339     // The bits we want to set are not set;
340     return true;
341   return false;
342 }
343 
transitionFrom(uint64_t State)344 uint64_t Transition::transitionFrom(uint64_t State) { return State | NewState; }
345 
printActionType(raw_ostream & OS)346 void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; }
347 
printActionValue(action_type A,raw_ostream & OS)348 void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) {
349   const ActionTuple &AT = Actions[A];
350   if (AT.size() > 1)
351     OS << "{";
352   ListSeparator LS;
353   for (const auto &SingleAction : AT) {
354     OS << LS;
355     if (const auto *R = std::get_if<const Record *>(&SingleAction))
356       OS << (*R)->getName();
357     else if (const auto *S = std::get_if<std::string>(&SingleAction))
358       OS << '"' << *S << '"';
359     else
360       OS << std::get<unsigned>(SingleAction);
361   }
362   if (AT.size() > 1)
363     OS << "}";
364 }
365 
366 static TableGen::Emitter::OptClass<AutomatonEmitter>
367     X("gen-automata", "Generate generic automata");
368