1*8bcb0991SDimitry Andric //===--------------------- DfaEmitter.h -----------------------------------===// 2*8bcb0991SDimitry Andric // 3*8bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*8bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*8bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*8bcb0991SDimitry Andric // 7*8bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 8*8bcb0991SDimitry Andric // Defines a generic automaton builder. This takes a set of transitions and 9*8bcb0991SDimitry Andric // states that represent a nondeterministic finite state automaton (NFA) and 10*8bcb0991SDimitry Andric // emits a determinized DFA in a form that include/llvm/Support/Automaton.h can 11*8bcb0991SDimitry Andric // drive. 12*8bcb0991SDimitry Andric // 13*8bcb0991SDimitry Andric // See file llvm/TableGen/Automaton.td for the TableGen API definition. 14*8bcb0991SDimitry Andric // 15*8bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 16*8bcb0991SDimitry Andric 17*8bcb0991SDimitry Andric #ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H 18*8bcb0991SDimitry Andric #define LLVM_UTILS_TABLEGEN_DFAEMITTER_H 19*8bcb0991SDimitry Andric 20*8bcb0991SDimitry Andric #include "llvm/ADT/StringRef.h" 21*8bcb0991SDimitry Andric #include "llvm/ADT/UniqueVector.h" 22*8bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h" 23*8bcb0991SDimitry Andric #include "llvm/TableGen/Record.h" 24*8bcb0991SDimitry Andric #include <set> 25*8bcb0991SDimitry Andric #include <unordered_map> 26*8bcb0991SDimitry Andric 27*8bcb0991SDimitry Andric namespace llvm { 28*8bcb0991SDimitry Andric 29*8bcb0991SDimitry Andric class raw_ostream; 30*8bcb0991SDimitry Andric /// Construct a deterministic finite state automaton from possible 31*8bcb0991SDimitry Andric /// nondeterministic state and transition data. 32*8bcb0991SDimitry Andric /// 33*8bcb0991SDimitry Andric /// The state type is a 64-bit unsigned integer. The generated automaton is 34*8bcb0991SDimitry Andric /// invariant to the sparsity of the state representation - its size is only 35*8bcb0991SDimitry Andric /// a function of the cardinality of the set of states. 36*8bcb0991SDimitry Andric /// 37*8bcb0991SDimitry Andric /// The inputs to this emitter are considered to define a nondeterministic 38*8bcb0991SDimitry Andric /// finite state automaton (NFA). This is then converted to a DFA during 39*8bcb0991SDimitry Andric /// emission. The emitted tables can be used to by 40*8bcb0991SDimitry Andric /// include/llvm/Support/Automaton.h. 41*8bcb0991SDimitry Andric class DfaEmitter { 42*8bcb0991SDimitry Andric public: 43*8bcb0991SDimitry Andric // The type of an NFA state. The initial state is always zero. 44*8bcb0991SDimitry Andric using state_type = uint64_t; 45*8bcb0991SDimitry Andric // The type of an action. 46*8bcb0991SDimitry Andric using action_type = uint64_t; 47*8bcb0991SDimitry Andric 48*8bcb0991SDimitry Andric DfaEmitter() = default; 49*8bcb0991SDimitry Andric virtual ~DfaEmitter() = default; 50*8bcb0991SDimitry Andric 51*8bcb0991SDimitry Andric void addTransition(state_type From, state_type To, action_type A); 52*8bcb0991SDimitry Andric void emit(StringRef Name, raw_ostream &OS); 53*8bcb0991SDimitry Andric 54*8bcb0991SDimitry Andric protected: 55*8bcb0991SDimitry Andric /// Emit the C++ type of an action to OS. 56*8bcb0991SDimitry Andric virtual void printActionType(raw_ostream &OS); 57*8bcb0991SDimitry Andric /// Emit the C++ value of an action A to OS. 58*8bcb0991SDimitry Andric virtual void printActionValue(action_type A, raw_ostream &OS); 59*8bcb0991SDimitry Andric 60*8bcb0991SDimitry Andric private: 61*8bcb0991SDimitry Andric /// The state type of deterministic states. These are only used internally to 62*8bcb0991SDimitry Andric /// this class. This is an ID into the DfaStates UniqueVector. 63*8bcb0991SDimitry Andric using dfa_state_type = unsigned; 64*8bcb0991SDimitry Andric 65*8bcb0991SDimitry Andric /// The actual representation of a DFA state, which is a union of one or more 66*8bcb0991SDimitry Andric /// NFA states. 67*8bcb0991SDimitry Andric using DfaState = SmallVector<state_type, 4>; 68*8bcb0991SDimitry Andric 69*8bcb0991SDimitry Andric /// A DFA transition consists of a set of NFA states transitioning to a 70*8bcb0991SDimitry Andric /// new set of NFA states. The DfaTransitionInfo tracks, for every 71*8bcb0991SDimitry Andric /// transitioned-from NFA state, a set of valid transitioned-to states. 72*8bcb0991SDimitry Andric /// 73*8bcb0991SDimitry Andric /// Emission of this transition relation allows algorithmic determination of 74*8bcb0991SDimitry Andric /// the possible candidate NFA paths taken under a given input sequence to 75*8bcb0991SDimitry Andric /// reach a given DFA state. 76*8bcb0991SDimitry Andric using DfaTransitionInfo = SmallVector<std::pair<state_type, state_type>, 4>; 77*8bcb0991SDimitry Andric 78*8bcb0991SDimitry Andric /// The set of all possible actions. 79*8bcb0991SDimitry Andric std::set<action_type> Actions; 80*8bcb0991SDimitry Andric 81*8bcb0991SDimitry Andric /// The set of nondeterministic transitions. A state-action pair can 82*8bcb0991SDimitry Andric /// transition to multiple target states. 83*8bcb0991SDimitry Andric std::map<std::pair<state_type, action_type>, std::vector<state_type>> 84*8bcb0991SDimitry Andric NfaTransitions; 85*8bcb0991SDimitry Andric std::set<state_type> NfaStates; 86*8bcb0991SDimitry Andric unsigned NumNfaTransitions = 0; 87*8bcb0991SDimitry Andric 88*8bcb0991SDimitry Andric /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID, 89*8bcb0991SDimitry Andric /// which is dfa_state_type. Note that because UniqueVector reserves state 90*8bcb0991SDimitry Andric /// zero, the initial DFA state is always 1. 91*8bcb0991SDimitry Andric UniqueVector<DfaState> DfaStates; 92*8bcb0991SDimitry Andric /// The set of deterministic transitions. A state-action pair has only a 93*8bcb0991SDimitry Andric /// single target state. 94*8bcb0991SDimitry Andric std::map<std::pair<dfa_state_type, action_type>, 95*8bcb0991SDimitry Andric std::pair<dfa_state_type, DfaTransitionInfo>> 96*8bcb0991SDimitry Andric DfaTransitions; 97*8bcb0991SDimitry Andric 98*8bcb0991SDimitry Andric /// Visit all NFA states and construct the DFA. 99*8bcb0991SDimitry Andric void constructDfa(); 100*8bcb0991SDimitry Andric /// Visit a single DFA state and construct all possible transitions to new DFA 101*8bcb0991SDimitry Andric /// states. 102*8bcb0991SDimitry Andric void visitDfaState(DfaState DS); 103*8bcb0991SDimitry Andric }; 104*8bcb0991SDimitry Andric 105*8bcb0991SDimitry Andric } // namespace llvm 106*8bcb0991SDimitry Andric 107*8bcb0991SDimitry Andric #endif 108