xref: /freebsd/contrib/llvm-project/llvm/utils/TableGen/DFAEmitter.h (revision 8bcb0991864975618c09697b1aca10683346d9f0)
1*8bcb0991SDimitry Andric //===--------------------- DfaEmitter.h -----------------------------------===//
2*8bcb0991SDimitry Andric //
3*8bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*8bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*8bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*8bcb0991SDimitry Andric //
7*8bcb0991SDimitry Andric //===----------------------------------------------------------------------===//
8*8bcb0991SDimitry Andric // Defines a generic automaton builder. This takes a set of transitions and
9*8bcb0991SDimitry Andric // states that represent a nondeterministic finite state automaton (NFA) and
10*8bcb0991SDimitry Andric // emits a determinized DFA in a form that include/llvm/Support/Automaton.h can
11*8bcb0991SDimitry Andric // drive.
12*8bcb0991SDimitry Andric //
13*8bcb0991SDimitry Andric // See file llvm/TableGen/Automaton.td for the TableGen API definition.
14*8bcb0991SDimitry Andric //
15*8bcb0991SDimitry Andric //===----------------------------------------------------------------------===//
16*8bcb0991SDimitry Andric 
17*8bcb0991SDimitry Andric #ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H
18*8bcb0991SDimitry Andric #define LLVM_UTILS_TABLEGEN_DFAEMITTER_H
19*8bcb0991SDimitry Andric 
20*8bcb0991SDimitry Andric #include "llvm/ADT/StringRef.h"
21*8bcb0991SDimitry Andric #include "llvm/ADT/UniqueVector.h"
22*8bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h"
23*8bcb0991SDimitry Andric #include "llvm/TableGen/Record.h"
24*8bcb0991SDimitry Andric #include <set>
25*8bcb0991SDimitry Andric #include <unordered_map>
26*8bcb0991SDimitry Andric 
27*8bcb0991SDimitry Andric namespace llvm {
28*8bcb0991SDimitry Andric 
29*8bcb0991SDimitry Andric class raw_ostream;
30*8bcb0991SDimitry Andric /// Construct a deterministic finite state automaton from possible
31*8bcb0991SDimitry Andric /// nondeterministic state and transition data.
32*8bcb0991SDimitry Andric ///
33*8bcb0991SDimitry Andric /// The state type is a 64-bit unsigned integer. The generated automaton is
34*8bcb0991SDimitry Andric /// invariant to the sparsity of the state representation - its size is only
35*8bcb0991SDimitry Andric /// a function of the cardinality of the set of states.
36*8bcb0991SDimitry Andric ///
37*8bcb0991SDimitry Andric /// The inputs to this emitter are considered to define a nondeterministic
38*8bcb0991SDimitry Andric /// finite state automaton (NFA). This is then converted to a DFA during
39*8bcb0991SDimitry Andric /// emission. The emitted tables can be used to by
40*8bcb0991SDimitry Andric /// include/llvm/Support/Automaton.h.
41*8bcb0991SDimitry Andric class DfaEmitter {
42*8bcb0991SDimitry Andric public:
43*8bcb0991SDimitry Andric   // The type of an NFA state. The initial state is always zero.
44*8bcb0991SDimitry Andric   using state_type = uint64_t;
45*8bcb0991SDimitry Andric   // The type of an action.
46*8bcb0991SDimitry Andric   using action_type = uint64_t;
47*8bcb0991SDimitry Andric 
48*8bcb0991SDimitry Andric   DfaEmitter() = default;
49*8bcb0991SDimitry Andric   virtual ~DfaEmitter() = default;
50*8bcb0991SDimitry Andric 
51*8bcb0991SDimitry Andric   void addTransition(state_type From, state_type To, action_type A);
52*8bcb0991SDimitry Andric   void emit(StringRef Name, raw_ostream &OS);
53*8bcb0991SDimitry Andric 
54*8bcb0991SDimitry Andric protected:
55*8bcb0991SDimitry Andric   /// Emit the C++ type of an action to OS.
56*8bcb0991SDimitry Andric   virtual void printActionType(raw_ostream &OS);
57*8bcb0991SDimitry Andric   /// Emit the C++ value of an action A to OS.
58*8bcb0991SDimitry Andric   virtual void printActionValue(action_type A, raw_ostream &OS);
59*8bcb0991SDimitry Andric 
60*8bcb0991SDimitry Andric private:
61*8bcb0991SDimitry Andric   /// The state type of deterministic states. These are only used internally to
62*8bcb0991SDimitry Andric   /// this class. This is an ID into the DfaStates UniqueVector.
63*8bcb0991SDimitry Andric   using dfa_state_type = unsigned;
64*8bcb0991SDimitry Andric 
65*8bcb0991SDimitry Andric   /// The actual representation of a DFA state, which is a union of one or more
66*8bcb0991SDimitry Andric   /// NFA states.
67*8bcb0991SDimitry Andric   using DfaState = SmallVector<state_type, 4>;
68*8bcb0991SDimitry Andric 
69*8bcb0991SDimitry Andric   /// A DFA transition consists of a set of NFA states transitioning to a
70*8bcb0991SDimitry Andric   /// new set of NFA states. The DfaTransitionInfo tracks, for every
71*8bcb0991SDimitry Andric   /// transitioned-from NFA state, a set of valid transitioned-to states.
72*8bcb0991SDimitry Andric   ///
73*8bcb0991SDimitry Andric   /// Emission of this transition relation allows algorithmic determination of
74*8bcb0991SDimitry Andric   /// the possible candidate NFA paths taken under a given input sequence to
75*8bcb0991SDimitry Andric   /// reach a given DFA state.
76*8bcb0991SDimitry Andric   using DfaTransitionInfo = SmallVector<std::pair<state_type, state_type>, 4>;
77*8bcb0991SDimitry Andric 
78*8bcb0991SDimitry Andric   /// The set of all possible actions.
79*8bcb0991SDimitry Andric   std::set<action_type> Actions;
80*8bcb0991SDimitry Andric 
81*8bcb0991SDimitry Andric   /// The set of nondeterministic transitions. A state-action pair can
82*8bcb0991SDimitry Andric   /// transition to multiple target states.
83*8bcb0991SDimitry Andric   std::map<std::pair<state_type, action_type>, std::vector<state_type>>
84*8bcb0991SDimitry Andric       NfaTransitions;
85*8bcb0991SDimitry Andric   std::set<state_type> NfaStates;
86*8bcb0991SDimitry Andric   unsigned NumNfaTransitions = 0;
87*8bcb0991SDimitry Andric 
88*8bcb0991SDimitry Andric   /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID,
89*8bcb0991SDimitry Andric   /// which is dfa_state_type. Note that because UniqueVector reserves state
90*8bcb0991SDimitry Andric   /// zero, the initial DFA state is always 1.
91*8bcb0991SDimitry Andric   UniqueVector<DfaState> DfaStates;
92*8bcb0991SDimitry Andric   /// The set of deterministic transitions. A state-action pair has only a
93*8bcb0991SDimitry Andric   /// single target state.
94*8bcb0991SDimitry Andric   std::map<std::pair<dfa_state_type, action_type>,
95*8bcb0991SDimitry Andric            std::pair<dfa_state_type, DfaTransitionInfo>>
96*8bcb0991SDimitry Andric       DfaTransitions;
97*8bcb0991SDimitry Andric 
98*8bcb0991SDimitry Andric   /// Visit all NFA states and construct the DFA.
99*8bcb0991SDimitry Andric   void constructDfa();
100*8bcb0991SDimitry Andric   /// Visit a single DFA state and construct all possible transitions to new DFA
101*8bcb0991SDimitry Andric   /// states.
102*8bcb0991SDimitry Andric   void visitDfaState(DfaState DS);
103*8bcb0991SDimitry Andric };
104*8bcb0991SDimitry Andric 
105*8bcb0991SDimitry Andric } // namespace llvm
106*8bcb0991SDimitry Andric 
107*8bcb0991SDimitry Andric #endif
108