1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/MC/MCExpr.h" 14 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 15 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 16 #include "llvm/MC/MCRegister.h" 17 #include "llvm/MC/MCTargetOptions.h" 18 #include "llvm/Support/Compiler.h" 19 #include "llvm/Support/SMLoc.h" 20 #include "llvm/TargetParser/SubtargetFeature.h" 21 #include <cstdint> 22 #include <memory> 23 24 namespace llvm { 25 26 class MCContext; 27 class MCInst; 28 class MCInstrInfo; 29 class MCStreamer; 30 class MCSubtargetInfo; 31 class MCSymbol; 32 template <typename T> class SmallVectorImpl; 33 34 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>; 35 36 enum AsmRewriteKind { 37 AOK_Align, // Rewrite align as .align. 38 AOK_EVEN, // Rewrite even as .even. 39 AOK_Emit, // Rewrite _emit as .byte. 40 AOK_CallInput, // Rewrite in terms of ${N:P}. 41 AOK_Input, // Rewrite in terms of $N. 42 AOK_Output, // Rewrite in terms of $N. 43 AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). 44 AOK_Label, // Rewrite local labels. 45 AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t"). 46 AOK_Skip, // Skip emission (e.g., offset/type operators). 47 AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp] 48 }; 49 50 const char AsmRewritePrecedence [] = { 51 2, // AOK_Align 52 2, // AOK_EVEN 53 2, // AOK_Emit 54 3, // AOK_Input 55 3, // AOK_CallInput 56 3, // AOK_Output 57 5, // AOK_SizeDirective 58 1, // AOK_Label 59 5, // AOK_EndOfStatement 60 2, // AOK_Skip 61 2 // AOK_IntelExpr 62 }; 63 64 // Represent the various parts which make up an intel expression, 65 // used for emitting compound intel expressions 66 struct IntelExpr { 67 bool NeedBracs = false; 68 int64_t Imm = 0; 69 StringRef BaseReg; 70 StringRef IndexReg; 71 StringRef OffsetName; 72 unsigned Scale = 1; 73 74 IntelExpr() = default; 75 // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression] IntelExprIntelExpr76 IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale, 77 StringRef offsetName, int64_t imm, bool needBracs) 78 : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg), 79 OffsetName(offsetName), Scale(1) { 80 if (scale) 81 Scale = scale; 82 } hasBaseRegIntelExpr83 bool hasBaseReg() const { return !BaseReg.empty(); } hasIndexRegIntelExpr84 bool hasIndexReg() const { return !IndexReg.empty(); } hasRegsIntelExpr85 bool hasRegs() const { return hasBaseReg() || hasIndexReg(); } hasOffsetIntelExpr86 bool hasOffset() const { return !OffsetName.empty(); } 87 // Normally we won't emit immediates unconditionally, 88 // unless we've got no other components emitImmIntelExpr89 bool emitImm() const { return !(hasRegs() || hasOffset()); } isValidIntelExpr90 bool isValid() const { 91 return (Scale == 1) || 92 (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8)); 93 } 94 }; 95 96 struct AsmRewrite { 97 AsmRewriteKind Kind; 98 SMLoc Loc; 99 unsigned Len; 100 bool Done; 101 int64_t Val; 102 StringRef Label; 103 IntelExpr IntelExp; 104 bool IntelExpRestricted; 105 106 public: 107 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0, 108 bool Restricted = false) KindAsmRewrite109 : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) { 110 IntelExpRestricted = Restricted; 111 } AsmRewriteAsmRewrite112 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) 113 : AsmRewrite(kind, loc, len) { Label = label; } AsmRewriteAsmRewrite114 AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp) 115 : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; } 116 }; 117 118 struct ParseInstructionInfo { 119 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; 120 121 ParseInstructionInfo() = default; ParseInstructionInfoParseInstructionInfo122 ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites) 123 : AsmRewrites(rewrites) {} 124 }; 125 126 /// Ternary parse status returned by various parse* methods. 127 class ParseStatus { 128 enum class StatusTy { 129 Success, // Parsing Succeeded 130 Failure, // Parsing Failed after consuming some tokens 131 NoMatch, // Parsing Failed without consuming any tokens 132 } Status; 133 134 public: 135 #if __cplusplus >= 202002L 136 using enum StatusTy; 137 #else 138 static constexpr StatusTy Success = StatusTy::Success; 139 static constexpr StatusTy Failure = StatusTy::Failure; 140 static constexpr StatusTy NoMatch = StatusTy::NoMatch; 141 #endif 142 ParseStatus()143 constexpr ParseStatus() : Status(NoMatch) {} 144 ParseStatus(StatusTy Status)145 constexpr ParseStatus(StatusTy Status) : Status(Status) {} 146 ParseStatus(bool Error)147 constexpr ParseStatus(bool Error) : Status(Error ? Failure : Success) {} 148 149 template <typename T> constexpr ParseStatus(T) = delete; 150 isSuccess()151 constexpr bool isSuccess() const { return Status == StatusTy::Success; } isFailure()152 constexpr bool isFailure() const { return Status == StatusTy::Failure; } isNoMatch()153 constexpr bool isNoMatch() const { return Status == StatusTy::NoMatch; } 154 }; 155 156 // When an operand is parsed, the assembler will try to iterate through a set of 157 // possible operand classes that the operand might match and call the 158 // corresponding PredicateMethod to determine that. 159 // 160 // If there are two AsmOperands that would give a specific diagnostic if there 161 // is no match, there is currently no mechanism to distinguish which operand is 162 // a closer match. The DiagnosticPredicate distinguishes between 'completely 163 // no match' and 'near match', so the assembler can decide whether to give a 164 // specific diagnostic, or use 'InvalidOperand' and continue to find a 165 // 'better matching' diagnostic. 166 // 167 // For example: 168 // opcode opnd0, onpd1, opnd2 169 // 170 // where: 171 // opnd2 could be an 'immediate of range [-8, 7]' 172 // opnd2 could be a 'register + shift/extend'. 173 // 174 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes 175 // little sense to give a diagnostic that the operand should be an immediate 176 // in range [-8, 7]. 177 // 178 // This is a light-weight alternative to the 'NearMissInfo' approach 179 // below which collects *all* possible diagnostics. This alternative 180 // is optional and fully backward compatible with existing 181 // PredicateMethods that return a 'bool' (match or near match). 182 struct DiagnosticPredicate { 183 enum PredicateTy { 184 Match, // Matches 185 NearMatch, // Close Match: use Specific Diagnostic 186 NoMatch, // No Match: use `InvalidOperand` 187 } Predicate; 188 DiagnosticPredicateDiagnosticPredicate189 constexpr DiagnosticPredicate(PredicateTy T) : Predicate(T) {} 190 DiagnosticPredicateDiagnosticPredicate191 explicit constexpr DiagnosticPredicate(bool Matches) 192 : Predicate(Matches ? Match : NearMatch) {} 193 194 explicit operator bool() const { return Predicate == Match; } 195 isMatchDiagnosticPredicate196 constexpr bool isMatch() const { return Predicate == Match; } isNearMatchDiagnosticPredicate197 constexpr bool isNearMatch() const { return Predicate == NearMatch; } isNoMatchDiagnosticPredicate198 constexpr bool isNoMatch() const { return Predicate == NoMatch; } 199 }; 200 201 // When matching of an assembly instruction fails, there may be multiple 202 // encodings that are close to being a match. It's often ambiguous which one 203 // the programmer intended to use, so we want to report an error which mentions 204 // each of these "near-miss" encodings. This struct contains information about 205 // one such encoding, and why it did not match the parsed instruction. 206 class NearMissInfo { 207 public: 208 enum NearMissKind { 209 NoNearMiss, 210 NearMissOperand, 211 NearMissFeature, 212 NearMissPredicate, 213 NearMissTooFewOperands, 214 }; 215 216 // The encoding is valid for the parsed assembly string. This is only used 217 // internally to the table-generated assembly matcher. getSuccess()218 static NearMissInfo getSuccess() { return NearMissInfo(); } 219 220 // The instruction encoding is not valid because it requires some target 221 // features that are not currently enabled. MissingFeatures has a bit set for 222 // each feature that the encoding needs but which is not enabled. getMissedFeature(const FeatureBitset & MissingFeatures)223 static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) { 224 NearMissInfo Result; 225 Result.Kind = NearMissFeature; 226 Result.Features = MissingFeatures; 227 return Result; 228 } 229 230 // The instruction encoding is not valid because the target-specific 231 // predicate function returned an error code. FailureCode is the 232 // target-specific error code returned by the predicate. getMissedPredicate(unsigned FailureCode)233 static NearMissInfo getMissedPredicate(unsigned FailureCode) { 234 NearMissInfo Result; 235 Result.Kind = NearMissPredicate; 236 Result.PredicateError = FailureCode; 237 return Result; 238 } 239 240 // The instruction encoding is not valid because one (and only one) parsed 241 // operand is not of the correct type. OperandError is the error code 242 // relating to the operand class expected by the encoding. OperandClass is 243 // the type of the expected operand. Opcode is the opcode of the encoding. 244 // OperandIndex is the index into the parsed operand list. getMissedOperand(unsigned OperandError,unsigned OperandClass,unsigned Opcode,unsigned OperandIndex)245 static NearMissInfo getMissedOperand(unsigned OperandError, 246 unsigned OperandClass, unsigned Opcode, 247 unsigned OperandIndex) { 248 NearMissInfo Result; 249 Result.Kind = NearMissOperand; 250 Result.MissedOperand.Error = OperandError; 251 Result.MissedOperand.Class = OperandClass; 252 Result.MissedOperand.Opcode = Opcode; 253 Result.MissedOperand.Index = OperandIndex; 254 return Result; 255 } 256 257 // The instruction encoding is not valid because it expects more operands 258 // than were parsed. OperandClass is the class of the expected operand that 259 // was not provided. Opcode is the instruction encoding. getTooFewOperands(unsigned OperandClass,unsigned Opcode)260 static NearMissInfo getTooFewOperands(unsigned OperandClass, 261 unsigned Opcode) { 262 NearMissInfo Result; 263 Result.Kind = NearMissTooFewOperands; 264 Result.TooFewOperands.Class = OperandClass; 265 Result.TooFewOperands.Opcode = Opcode; 266 return Result; 267 } 268 269 operator bool() const { return Kind != NoNearMiss; } 270 getKind()271 NearMissKind getKind() const { return Kind; } 272 273 // Feature flags required by the instruction, that the current target does 274 // not have. getFeatures()275 const FeatureBitset& getFeatures() const { 276 assert(Kind == NearMissFeature); 277 return Features; 278 } 279 // Error code returned by the target predicate when validating this 280 // instruction encoding. getPredicateError()281 unsigned getPredicateError() const { 282 assert(Kind == NearMissPredicate); 283 return PredicateError; 284 } 285 // MatchClassKind of the operand that we expected to see. getOperandClass()286 unsigned getOperandClass() const { 287 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 288 return MissedOperand.Class; 289 } 290 // Opcode of the encoding we were trying to match. getOpcode()291 unsigned getOpcode() const { 292 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 293 return MissedOperand.Opcode; 294 } 295 // Error code returned when validating the operand. getOperandError()296 unsigned getOperandError() const { 297 assert(Kind == NearMissOperand); 298 return MissedOperand.Error; 299 } 300 // Index of the actual operand we were trying to match in the list of parsed 301 // operands. getOperandIndex()302 unsigned getOperandIndex() const { 303 assert(Kind == NearMissOperand); 304 return MissedOperand.Index; 305 } 306 307 private: 308 NearMissKind Kind; 309 310 // These two structs share a common prefix, so we can safely rely on the fact 311 // that they overlap in the union. 312 struct MissedOpInfo { 313 unsigned Class; 314 unsigned Opcode; 315 unsigned Error; 316 unsigned Index; 317 }; 318 319 struct TooFewOperandsInfo { 320 unsigned Class; 321 unsigned Opcode; 322 }; 323 324 union { 325 FeatureBitset Features; 326 unsigned PredicateError; 327 MissedOpInfo MissedOperand; 328 TooFewOperandsInfo TooFewOperands; 329 }; 330 NearMissInfo()331 NearMissInfo() : Kind(NoNearMiss) {} 332 }; 333 334 /// MCTargetAsmParser - Generic interface to target specific assembly parsers. 335 class LLVM_ABI MCTargetAsmParser : public MCAsmParserExtension { 336 public: 337 enum MatchResultTy { 338 Match_InvalidOperand, 339 Match_InvalidTiedOperand, 340 Match_MissingFeature, 341 Match_MnemonicFail, 342 Match_Success, 343 Match_NearMisses, 344 FIRST_TARGET_MATCH_RESULT_TY 345 }; 346 347 protected: // Can only create subclasses. 348 MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI, 349 const MCInstrInfo &MII); 350 351 /// Create a copy of STI and return a non-const reference to it. 352 MCSubtargetInfo ©STI(); 353 354 /// AvailableFeatures - The current set of available features. 355 FeatureBitset AvailableFeatures; 356 357 /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly? 358 bool ParsingMSInlineAsm = false; 359 360 /// SemaCallback - The Sema callback implementation. Must be set when parsing 361 /// ms-style inline assembly. 362 MCAsmParserSemaCallback *SemaCallback = nullptr; 363 364 /// Set of options which affects instrumentation of inline assembly. 365 MCTargetOptions MCOptions; 366 367 /// Current STI. 368 const MCSubtargetInfo *STI; 369 370 const MCInstrInfo &MII; 371 372 public: 373 MCTargetAsmParser(const MCTargetAsmParser &) = delete; 374 MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete; 375 376 ~MCTargetAsmParser() override; 377 378 const MCSubtargetInfo &getSTI() const; 379 getAvailableFeatures()380 const FeatureBitset& getAvailableFeatures() const { 381 return AvailableFeatures; 382 } setAvailableFeatures(const FeatureBitset & Value)383 void setAvailableFeatures(const FeatureBitset& Value) { 384 AvailableFeatures = Value; 385 } 386 isParsingMSInlineAsm()387 bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; } setParsingMSInlineAsm(bool Value)388 void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; } 389 getTargetOptions()390 MCTargetOptions getTargetOptions() const { return MCOptions; } 391 setSemaCallback(MCAsmParserSemaCallback * Callback)392 void setSemaCallback(MCAsmParserSemaCallback *Callback) { 393 SemaCallback = Callback; 394 } 395 396 // Target-specific parsing of expression. parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)397 virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 398 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr); 399 } 400 // Parse an expression in a data directive, possibly with a relocation 401 // specifier. parseDataExpr(const MCExpr * & Res)402 virtual bool parseDataExpr(const MCExpr *&Res) { 403 SMLoc EndLoc; 404 return getParser().parseExpression(Res, EndLoc); 405 } 406 407 virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, 408 SMLoc &EndLoc) = 0; 409 410 /// tryParseRegister - parse one register if possible 411 /// 412 /// Check whether a register specification can be parsed at the current 413 /// location, without failing the entire parse if it can't. Must not consume 414 /// tokens if the parse fails. 415 virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 416 SMLoc &EndLoc) = 0; 417 418 /// Parse one assembly instruction. 419 /// 420 /// The parser is positioned following the instruction name. The target 421 /// specific instruction parser should parse the entire instruction and 422 /// construct the appropriate MCInst, or emit an error. On success, the entire 423 /// line should be parsed up to and including the end-of-statement token. On 424 /// failure, the parser is not required to read to the end of the line. 425 // 426 /// \param Name - The instruction name. 427 /// \param NameLoc - The source location of the name. 428 /// \param Operands [out] - The list of parsed operands, this returns 429 /// ownership of them to the caller. 430 /// \return True on failure. 431 virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, 432 SMLoc NameLoc, OperandVector &Operands) = 0; parseInstruction(ParseInstructionInfo & Info,StringRef Name,AsmToken Token,OperandVector & Operands)433 virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, 434 AsmToken Token, OperandVector &Operands) { 435 return parseInstruction(Info, Name, Token.getLoc(), Operands); 436 } 437 438 /// ParseDirective - Parse a target specific assembler directive 439 /// This method is deprecated, use 'parseDirective' instead. 440 /// 441 /// The parser is positioned following the directive name. The target 442 /// specific directive parser should parse the entire directive doing or 443 /// recording any target specific work, or return true and do nothing if the 444 /// directive is not target specific. If the directive is specific for 445 /// the target, the entire line is parsed up to and including the 446 /// end-of-statement token and false is returned. 447 /// 448 /// \param DirectiveID - the identifier token of the directive. ParseDirective(AsmToken DirectiveID)449 virtual bool ParseDirective(AsmToken DirectiveID) { return true; } 450 451 /// Parses a target-specific assembler directive. 452 /// 453 /// The parser is positioned following the directive name. The target-specific 454 /// directive parser should parse the entire directive doing or recording any 455 /// target-specific work, or emit an error. On success, the entire line should 456 /// be parsed up to and including the end-of-statement token. On failure, the 457 /// parser is not required to read to the end of the line. If the directive is 458 /// not target-specific, no tokens should be consumed and NoMatch is returned. 459 /// 460 /// \param DirectiveID - The token identifying the directive. 461 virtual ParseStatus parseDirective(AsmToken DirectiveID); 462 463 /// Recognize a series of operands of a parsed 464 /// instruction as an actual MCInst and emit it to the specified MCStreamer. 465 /// This returns false on success and returns true on failure to match. 466 /// 467 /// On failure, the target parser is responsible for emitting a diagnostic 468 /// explaining the match failure. 469 virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 470 OperandVector &Operands, MCStreamer &Out, 471 uint64_t &ErrorInfo, 472 bool MatchingInlineAsm) = 0; 473 474 /// Allows targets to let registers opt out of clobber lists. omitRegisterFromClobberLists(MCRegister Reg)475 virtual bool omitRegisterFromClobberLists(MCRegister Reg) { return false; } 476 477 /// Allow a target to add special case operand matching for things that 478 /// tblgen doesn't/can't handle effectively. For example, literal 479 /// immediates on ARM. TableGen expects a token operand, but the parser 480 /// will recognize them as immediates. validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)481 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 482 unsigned Kind) { 483 return Match_InvalidOperand; 484 } 485 486 /// Validate the instruction match against any complex target predicates 487 /// before rendering any operands to it. 488 virtual unsigned checkEarlyTargetMatchPredicate(MCInst & Inst,const OperandVector & Operands)489 checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { 490 return Match_Success; 491 } 492 493 /// checkTargetMatchPredicate - Validate the instruction match against 494 /// any complex target predicates not expressible via match classes. checkTargetMatchPredicate(MCInst & Inst)495 virtual unsigned checkTargetMatchPredicate(MCInst &Inst) { 496 return Match_Success; 497 } 498 499 virtual void convertToMapAndConstraints(unsigned Kind, 500 const OperandVector &Operands) = 0; 501 502 /// Returns whether two operands are registers and are equal. This is used 503 /// by the tied-operands checks in the AsmMatcher. This method can be 504 /// overridden to allow e.g. a sub- or super-register as the tied operand. 505 virtual bool areEqualRegs(const MCParsedAsmOperand &Op1, 506 const MCParsedAsmOperand &Op2) const; 507 508 // Return whether this parser uses assignment statements with equals tokens equalIsAsmAssignment()509 virtual bool equalIsAsmAssignment() { return true; }; 510 // Return whether this start of statement identifier is a label isLabel(AsmToken & Token)511 virtual bool isLabel(AsmToken &Token) { return true; }; 512 // Return whether this parser accepts the given token as start of statement. tokenIsStartOfStatement(AsmToken::TokenKind Token)513 virtual bool tokenIsStartOfStatement(AsmToken::TokenKind Token) { 514 return false; 515 } 516 applySpecifier(const MCExpr * E,uint32_t,MCContext & Ctx)517 virtual const MCExpr *applySpecifier(const MCExpr *E, uint32_t, 518 MCContext &Ctx) { 519 return nullptr; 520 } 521 522 // For actions that have to be performed before a label is emitted doBeforeLabelEmit(MCSymbol * Symbol,SMLoc IDLoc)523 virtual void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) {} 524 onLabelParsed(MCSymbol * Symbol)525 virtual void onLabelParsed(MCSymbol *Symbol) {} 526 527 /// Ensure that all previously parsed instructions have been emitted to the 528 /// output streamer, if the target does not emit them immediately. flushPendingInstructions(MCStreamer & Out)529 virtual void flushPendingInstructions(MCStreamer &Out) {} 530 531 // For any initialization at the beginning of parsing. onBeginOfFile()532 virtual void onBeginOfFile() {} 533 534 // For any checks or cleanups at the end of parsing. onEndOfFile()535 virtual void onEndOfFile() {} 536 }; 537 538 } // end namespace llvm 539 540 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 541