xref: /freebsd/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
15 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
16 #include "llvm/MC/MCRegister.h"
17 #include "llvm/MC/MCTargetOptions.h"
18 #include "llvm/Support/Compiler.h"
19 #include "llvm/Support/SMLoc.h"
20 #include "llvm/TargetParser/SubtargetFeature.h"
21 #include <cstdint>
22 #include <memory>
23 
24 namespace llvm {
25 
26 class MCContext;
27 class MCInst;
28 class MCInstrInfo;
29 class MCStreamer;
30 class MCSubtargetInfo;
31 class MCSymbol;
32 template <typename T> class SmallVectorImpl;
33 
34 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
35 
36 enum AsmRewriteKind {
37   AOK_Align,          // Rewrite align as .align.
38   AOK_EVEN,           // Rewrite even as .even.
39   AOK_Emit,           // Rewrite _emit as .byte.
40   AOK_CallInput,      // Rewrite in terms of ${N:P}.
41   AOK_Input,          // Rewrite in terms of $N.
42   AOK_Output,         // Rewrite in terms of $N.
43   AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
44   AOK_Label,          // Rewrite local labels.
45   AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
46   AOK_Skip,           // Skip emission (e.g., offset/type operators).
47   AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
48 };
49 
50 const char AsmRewritePrecedence [] = {
51   2, // AOK_Align
52   2, // AOK_EVEN
53   2, // AOK_Emit
54   3, // AOK_Input
55   3, // AOK_CallInput
56   3, // AOK_Output
57   5, // AOK_SizeDirective
58   1, // AOK_Label
59   5, // AOK_EndOfStatement
60   2, // AOK_Skip
61   2  // AOK_IntelExpr
62 };
63 
64 // Represent the various parts which make up an intel expression,
65 // used for emitting compound intel expressions
66 struct IntelExpr {
67   bool NeedBracs = false;
68   int64_t Imm = 0;
69   StringRef BaseReg;
70   StringRef IndexReg;
71   StringRef OffsetName;
72   unsigned Scale = 1;
73 
74   IntelExpr() = default;
75   // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
IntelExprIntelExpr76   IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
77             StringRef offsetName, int64_t imm, bool needBracs)
78       : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg),
79         OffsetName(offsetName), Scale(1) {
80     if (scale)
81       Scale = scale;
82   }
hasBaseRegIntelExpr83   bool hasBaseReg() const { return !BaseReg.empty(); }
hasIndexRegIntelExpr84   bool hasIndexReg() const { return !IndexReg.empty(); }
hasRegsIntelExpr85   bool hasRegs() const { return hasBaseReg() || hasIndexReg(); }
hasOffsetIntelExpr86   bool hasOffset() const { return !OffsetName.empty(); }
87   // Normally we won't emit immediates unconditionally,
88   // unless we've got no other components
emitImmIntelExpr89   bool emitImm() const { return !(hasRegs() || hasOffset()); }
isValidIntelExpr90   bool isValid() const {
91     return (Scale == 1) ||
92            (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
93   }
94 };
95 
96 struct AsmRewrite {
97   AsmRewriteKind Kind;
98   SMLoc Loc;
99   unsigned Len;
100   bool Done;
101   int64_t Val;
102   StringRef Label;
103   IntelExpr IntelExp;
104   bool IntelExpRestricted;
105 
106 public:
107   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0,
108              bool Restricted = false)
KindAsmRewrite109       : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {
110     IntelExpRestricted = Restricted;
111   }
AsmRewriteAsmRewrite112   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
113     : AsmRewrite(kind, loc, len) { Label = label; }
AsmRewriteAsmRewrite114   AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
115     : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
116 };
117 
118 struct ParseInstructionInfo {
119   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
120 
121   ParseInstructionInfo() = default;
ParseInstructionInfoParseInstructionInfo122   ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
123     : AsmRewrites(rewrites) {}
124 };
125 
126 /// Ternary parse status returned by various parse* methods.
127 class ParseStatus {
128   enum class StatusTy {
129     Success, // Parsing Succeeded
130     Failure, // Parsing Failed after consuming some tokens
131     NoMatch, // Parsing Failed without consuming any tokens
132   } Status;
133 
134 public:
135 #if __cplusplus >= 202002L
136   using enum StatusTy;
137 #else
138   static constexpr StatusTy Success = StatusTy::Success;
139   static constexpr StatusTy Failure = StatusTy::Failure;
140   static constexpr StatusTy NoMatch = StatusTy::NoMatch;
141 #endif
142 
ParseStatus()143   constexpr ParseStatus() : Status(NoMatch) {}
144 
ParseStatus(StatusTy Status)145   constexpr ParseStatus(StatusTy Status) : Status(Status) {}
146 
ParseStatus(bool Error)147   constexpr ParseStatus(bool Error) : Status(Error ? Failure : Success) {}
148 
149   template <typename T> constexpr ParseStatus(T) = delete;
150 
isSuccess()151   constexpr bool isSuccess() const { return Status == StatusTy::Success; }
isFailure()152   constexpr bool isFailure() const { return Status == StatusTy::Failure; }
isNoMatch()153   constexpr bool isNoMatch() const { return Status == StatusTy::NoMatch; }
154 };
155 
156 // When an operand is parsed, the assembler will try to iterate through a set of
157 // possible operand classes that the operand might match and call the
158 // corresponding PredicateMethod to determine that.
159 //
160 // If there are two AsmOperands that would give a specific diagnostic if there
161 // is no match, there is currently no mechanism to distinguish which operand is
162 // a closer match. The DiagnosticPredicate distinguishes between 'completely
163 // no match' and 'near match', so the assembler can decide whether to give a
164 // specific diagnostic, or use 'InvalidOperand' and continue to find a
165 // 'better matching' diagnostic.
166 //
167 // For example:
168 //    opcode opnd0, onpd1, opnd2
169 //
170 // where:
171 //    opnd2 could be an 'immediate of range [-8, 7]'
172 //    opnd2 could be a  'register + shift/extend'.
173 //
174 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
175 // little sense to give a diagnostic that the operand should be an immediate
176 // in range [-8, 7].
177 //
178 // This is a light-weight alternative to the 'NearMissInfo' approach
179 // below which collects *all* possible diagnostics. This alternative
180 // is optional and fully backward compatible with existing
181 // PredicateMethods that return a 'bool' (match or near match).
182 struct DiagnosticPredicate {
183   enum PredicateTy {
184     Match,     // Matches
185     NearMatch, // Close Match: use Specific Diagnostic
186     NoMatch,   // No Match: use `InvalidOperand`
187   } Predicate;
188 
DiagnosticPredicateDiagnosticPredicate189   constexpr DiagnosticPredicate(PredicateTy T) : Predicate(T) {}
190 
DiagnosticPredicateDiagnosticPredicate191   explicit constexpr DiagnosticPredicate(bool Matches)
192       : Predicate(Matches ? Match : NearMatch) {}
193 
194   explicit operator bool() const { return Predicate == Match; }
195 
isMatchDiagnosticPredicate196   constexpr bool isMatch() const { return Predicate == Match; }
isNearMatchDiagnosticPredicate197   constexpr bool isNearMatch() const { return Predicate == NearMatch; }
isNoMatchDiagnosticPredicate198   constexpr bool isNoMatch() const { return Predicate == NoMatch; }
199 };
200 
201 // When matching of an assembly instruction fails, there may be multiple
202 // encodings that are close to being a match. It's often ambiguous which one
203 // the programmer intended to use, so we want to report an error which mentions
204 // each of these "near-miss" encodings. This struct contains information about
205 // one such encoding, and why it did not match the parsed instruction.
206 class NearMissInfo {
207 public:
208   enum NearMissKind {
209     NoNearMiss,
210     NearMissOperand,
211     NearMissFeature,
212     NearMissPredicate,
213     NearMissTooFewOperands,
214   };
215 
216   // The encoding is valid for the parsed assembly string. This is only used
217   // internally to the table-generated assembly matcher.
getSuccess()218   static NearMissInfo getSuccess() { return NearMissInfo(); }
219 
220   // The instruction encoding is not valid because it requires some target
221   // features that are not currently enabled. MissingFeatures has a bit set for
222   // each feature that the encoding needs but which is not enabled.
getMissedFeature(const FeatureBitset & MissingFeatures)223   static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
224     NearMissInfo Result;
225     Result.Kind = NearMissFeature;
226     Result.Features = MissingFeatures;
227     return Result;
228   }
229 
230   // The instruction encoding is not valid because the target-specific
231   // predicate function returned an error code. FailureCode is the
232   // target-specific error code returned by the predicate.
getMissedPredicate(unsigned FailureCode)233   static NearMissInfo getMissedPredicate(unsigned FailureCode) {
234     NearMissInfo Result;
235     Result.Kind = NearMissPredicate;
236     Result.PredicateError = FailureCode;
237     return Result;
238   }
239 
240   // The instruction encoding is not valid because one (and only one) parsed
241   // operand is not of the correct type. OperandError is the error code
242   // relating to the operand class expected by the encoding. OperandClass is
243   // the type of the expected operand. Opcode is the opcode of the encoding.
244   // OperandIndex is the index into the parsed operand list.
getMissedOperand(unsigned OperandError,unsigned OperandClass,unsigned Opcode,unsigned OperandIndex)245   static NearMissInfo getMissedOperand(unsigned OperandError,
246                                        unsigned OperandClass, unsigned Opcode,
247                                        unsigned OperandIndex) {
248     NearMissInfo Result;
249     Result.Kind = NearMissOperand;
250     Result.MissedOperand.Error = OperandError;
251     Result.MissedOperand.Class = OperandClass;
252     Result.MissedOperand.Opcode = Opcode;
253     Result.MissedOperand.Index = OperandIndex;
254     return Result;
255   }
256 
257   // The instruction encoding is not valid because it expects more operands
258   // than were parsed. OperandClass is the class of the expected operand that
259   // was not provided. Opcode is the instruction encoding.
getTooFewOperands(unsigned OperandClass,unsigned Opcode)260   static NearMissInfo getTooFewOperands(unsigned OperandClass,
261                                         unsigned Opcode) {
262     NearMissInfo Result;
263     Result.Kind = NearMissTooFewOperands;
264     Result.TooFewOperands.Class = OperandClass;
265     Result.TooFewOperands.Opcode = Opcode;
266     return Result;
267   }
268 
269   operator bool() const { return Kind != NoNearMiss; }
270 
getKind()271   NearMissKind getKind() const { return Kind; }
272 
273   // Feature flags required by the instruction, that the current target does
274   // not have.
getFeatures()275   const FeatureBitset& getFeatures() const {
276     assert(Kind == NearMissFeature);
277     return Features;
278   }
279   // Error code returned by the target predicate when validating this
280   // instruction encoding.
getPredicateError()281   unsigned getPredicateError() const {
282     assert(Kind == NearMissPredicate);
283     return PredicateError;
284   }
285   // MatchClassKind of the operand that we expected to see.
getOperandClass()286   unsigned getOperandClass() const {
287     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
288     return MissedOperand.Class;
289   }
290   // Opcode of the encoding we were trying to match.
getOpcode()291   unsigned getOpcode() const {
292     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
293     return MissedOperand.Opcode;
294   }
295   // Error code returned when validating the operand.
getOperandError()296   unsigned getOperandError() const {
297     assert(Kind == NearMissOperand);
298     return MissedOperand.Error;
299   }
300   // Index of the actual operand we were trying to match in the list of parsed
301   // operands.
getOperandIndex()302   unsigned getOperandIndex() const {
303     assert(Kind == NearMissOperand);
304     return MissedOperand.Index;
305   }
306 
307 private:
308   NearMissKind Kind;
309 
310   // These two structs share a common prefix, so we can safely rely on the fact
311   // that they overlap in the union.
312   struct MissedOpInfo {
313     unsigned Class;
314     unsigned Opcode;
315     unsigned Error;
316     unsigned Index;
317   };
318 
319   struct TooFewOperandsInfo {
320     unsigned Class;
321     unsigned Opcode;
322   };
323 
324   union {
325     FeatureBitset Features;
326     unsigned PredicateError;
327     MissedOpInfo MissedOperand;
328     TooFewOperandsInfo TooFewOperands;
329   };
330 
NearMissInfo()331   NearMissInfo() : Kind(NoNearMiss) {}
332 };
333 
334 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
335 class LLVM_ABI MCTargetAsmParser : public MCAsmParserExtension {
336 public:
337   enum MatchResultTy {
338     Match_InvalidOperand,
339     Match_InvalidTiedOperand,
340     Match_MissingFeature,
341     Match_MnemonicFail,
342     Match_Success,
343     Match_NearMisses,
344     FIRST_TARGET_MATCH_RESULT_TY
345   };
346 
347 protected: // Can only create subclasses.
348   MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
349                     const MCInstrInfo &MII);
350 
351   /// Create a copy of STI and return a non-const reference to it.
352   MCSubtargetInfo &copySTI();
353 
354   /// AvailableFeatures - The current set of available features.
355   FeatureBitset AvailableFeatures;
356 
357   /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly?
358   bool ParsingMSInlineAsm = false;
359 
360   /// SemaCallback - The Sema callback implementation.  Must be set when parsing
361   /// ms-style inline assembly.
362   MCAsmParserSemaCallback *SemaCallback = nullptr;
363 
364   /// Set of options which affects instrumentation of inline assembly.
365   MCTargetOptions MCOptions;
366 
367   /// Current STI.
368   const MCSubtargetInfo *STI;
369 
370   const MCInstrInfo &MII;
371 
372 public:
373   MCTargetAsmParser(const MCTargetAsmParser &) = delete;
374   MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
375 
376   ~MCTargetAsmParser() override;
377 
378   const MCSubtargetInfo &getSTI() const;
379 
getAvailableFeatures()380   const FeatureBitset& getAvailableFeatures() const {
381     return AvailableFeatures;
382   }
setAvailableFeatures(const FeatureBitset & Value)383   void setAvailableFeatures(const FeatureBitset& Value) {
384     AvailableFeatures = Value;
385   }
386 
isParsingMSInlineAsm()387   bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; }
setParsingMSInlineAsm(bool Value)388   void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; }
389 
getTargetOptions()390   MCTargetOptions getTargetOptions() const { return MCOptions; }
391 
setSemaCallback(MCAsmParserSemaCallback * Callback)392   void setSemaCallback(MCAsmParserSemaCallback *Callback) {
393     SemaCallback = Callback;
394   }
395 
396   // Target-specific parsing of expression.
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)397   virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
398     return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
399   }
400   // Parse an expression in a data directive, possibly with a relocation
401   // specifier.
parseDataExpr(const MCExpr * & Res)402   virtual bool parseDataExpr(const MCExpr *&Res) {
403     SMLoc EndLoc;
404     return getParser().parseExpression(Res, EndLoc);
405   }
406 
407   virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc,
408                              SMLoc &EndLoc) = 0;
409 
410   /// tryParseRegister - parse one register if possible
411   ///
412   /// Check whether a register specification can be parsed at the current
413   /// location, without failing the entire parse if it can't. Must not consume
414   /// tokens if the parse fails.
415   virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
416                                        SMLoc &EndLoc) = 0;
417 
418   /// Parse one assembly instruction.
419   ///
420   /// The parser is positioned following the instruction name. The target
421   /// specific instruction parser should parse the entire instruction and
422   /// construct the appropriate MCInst, or emit an error. On success, the entire
423   /// line should be parsed up to and including the end-of-statement token. On
424   /// failure, the parser is not required to read to the end of the line.
425   //
426   /// \param Name - The instruction name.
427   /// \param NameLoc - The source location of the name.
428   /// \param Operands [out] - The list of parsed operands, this returns
429   ///        ownership of them to the caller.
430   /// \return True on failure.
431   virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
432                                 SMLoc NameLoc, OperandVector &Operands) = 0;
parseInstruction(ParseInstructionInfo & Info,StringRef Name,AsmToken Token,OperandVector & Operands)433   virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
434                                 AsmToken Token, OperandVector &Operands) {
435     return parseInstruction(Info, Name, Token.getLoc(), Operands);
436   }
437 
438   /// ParseDirective - Parse a target specific assembler directive
439   /// This method is deprecated, use 'parseDirective' instead.
440   ///
441   /// The parser is positioned following the directive name.  The target
442   /// specific directive parser should parse the entire directive doing or
443   /// recording any target specific work, or return true and do nothing if the
444   /// directive is not target specific. If the directive is specific for
445   /// the target, the entire line is parsed up to and including the
446   /// end-of-statement token and false is returned.
447   ///
448   /// \param DirectiveID - the identifier token of the directive.
ParseDirective(AsmToken DirectiveID)449   virtual bool ParseDirective(AsmToken DirectiveID) { return true; }
450 
451   /// Parses a target-specific assembler directive.
452   ///
453   /// The parser is positioned following the directive name. The target-specific
454   /// directive parser should parse the entire directive doing or recording any
455   /// target-specific work, or emit an error. On success, the entire line should
456   /// be parsed up to and including the end-of-statement token. On failure, the
457   /// parser is not required to read to the end of the line. If the directive is
458   /// not target-specific, no tokens should be consumed and NoMatch is returned.
459   ///
460   /// \param DirectiveID - The token identifying the directive.
461   virtual ParseStatus parseDirective(AsmToken DirectiveID);
462 
463   /// Recognize a series of operands of a parsed
464   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
465   /// This returns false on success and returns true on failure to match.
466   ///
467   /// On failure, the target parser is responsible for emitting a diagnostic
468   /// explaining the match failure.
469   virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
470                                        OperandVector &Operands, MCStreamer &Out,
471                                        uint64_t &ErrorInfo,
472                                        bool MatchingInlineAsm) = 0;
473 
474   /// Allows targets to let registers opt out of clobber lists.
omitRegisterFromClobberLists(MCRegister Reg)475   virtual bool omitRegisterFromClobberLists(MCRegister Reg) { return false; }
476 
477   /// Allow a target to add special case operand matching for things that
478   /// tblgen doesn't/can't handle effectively. For example, literal
479   /// immediates on ARM. TableGen expects a token operand, but the parser
480   /// will recognize them as immediates.
validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)481   virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
482                                               unsigned Kind) {
483     return Match_InvalidOperand;
484   }
485 
486   /// Validate the instruction match against any complex target predicates
487   /// before rendering any operands to it.
488   virtual unsigned
checkEarlyTargetMatchPredicate(MCInst & Inst,const OperandVector & Operands)489   checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
490     return Match_Success;
491   }
492 
493   /// checkTargetMatchPredicate - Validate the instruction match against
494   /// any complex target predicates not expressible via match classes.
checkTargetMatchPredicate(MCInst & Inst)495   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
496     return Match_Success;
497   }
498 
499   virtual void convertToMapAndConstraints(unsigned Kind,
500                                           const OperandVector &Operands) = 0;
501 
502   /// Returns whether two operands are registers and are equal. This is used
503   /// by the tied-operands checks in the AsmMatcher. This method can be
504   /// overridden to allow e.g. a sub- or super-register as the tied operand.
505   virtual bool areEqualRegs(const MCParsedAsmOperand &Op1,
506                             const MCParsedAsmOperand &Op2) const;
507 
508   // Return whether this parser uses assignment statements with equals tokens
equalIsAsmAssignment()509   virtual bool equalIsAsmAssignment() { return true; };
510   // Return whether this start of statement identifier is a label
isLabel(AsmToken & Token)511   virtual bool isLabel(AsmToken &Token) { return true; };
512   // Return whether this parser accepts the given token as start of statement.
tokenIsStartOfStatement(AsmToken::TokenKind Token)513   virtual bool tokenIsStartOfStatement(AsmToken::TokenKind Token) {
514     return false;
515   }
516 
applySpecifier(const MCExpr * E,uint32_t,MCContext & Ctx)517   virtual const MCExpr *applySpecifier(const MCExpr *E, uint32_t,
518                                        MCContext &Ctx) {
519     return nullptr;
520   }
521 
522   // For actions that have to be performed before a label is emitted
doBeforeLabelEmit(MCSymbol * Symbol,SMLoc IDLoc)523   virtual void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) {}
524 
onLabelParsed(MCSymbol * Symbol)525   virtual void onLabelParsed(MCSymbol *Symbol) {}
526 
527   /// Ensure that all previously parsed instructions have been emitted to the
528   /// output streamer, if the target does not emit them immediately.
flushPendingInstructions(MCStreamer & Out)529   virtual void flushPendingInstructions(MCStreamer &Out) {}
530 
531   // For any initialization at the beginning of parsing.
onBeginOfFile()532   virtual void onBeginOfFile() {}
533 
534   // For any checks or cleanups at the end of parsing.
onEndOfFile()535   virtual void onEndOfFile() {}
536 };
537 
538 } // end namespace llvm
539 
540 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
541