xref: /freebsd/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp (revision 9c77fb6aaa366cbabc80ee1b834bcfe4df135491)
1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/BPFMCAsmInfo.h"
10 #include "MCTargetDesc/BPFMCTargetDesc.h"
11 #include "TargetInfo/BPFTargetInfo.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/MC/MCContext.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCInstrInfo.h"
17 #include "llvm/MC/MCParser/AsmLexer.h"
18 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
19 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
20 #include "llvm/MC/MCStreamer.h"
21 #include "llvm/MC/MCSubtargetInfo.h"
22 #include "llvm/MC/TargetRegistry.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/Compiler.h"
25 
26 using namespace llvm;
27 
28 namespace {
29 struct BPFOperand;
30 
31 class BPFAsmParser : public MCTargetAsmParser {
32 
33   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34 
35   bool PreMatchCheck(OperandVector &Operands);
36 
37   bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
38                                OperandVector &Operands, MCStreamer &Out,
39                                uint64_t &ErrorInfo,
40                                bool MatchingInlineAsm) override;
41 
42   bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override;
43   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
44                                SMLoc &EndLoc) override;
45 
46   bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
47                         SMLoc NameLoc, OperandVector &Operands) override;
48 
49   // "=" is used as assignment operator for assembly statment, so can't be used
50   // for symbol assignment.
51   bool equalIsAsmAssignment() override { return false; }
52   // "*" is used for dereferencing memory that it will be the start of
53   // statement.
54   bool tokenIsStartOfStatement(AsmToken::TokenKind Token) override {
55     return Token == AsmToken::Star;
56   }
57 
58 #define GET_ASSEMBLER_HEADER
59 #include "BPFGenAsmMatcher.inc"
60 
61   ParseStatus parseImmediate(OperandVector &Operands);
62   ParseStatus parseRegister(OperandVector &Operands);
63   ParseStatus parseOperandAsOperator(OperandVector &Operands);
64 
65 public:
66   enum BPFMatchResultTy {
67     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
68 #define GET_OPERAND_DIAGNOSTIC_TYPES
69 #include "BPFGenAsmMatcher.inc"
70 #undef GET_OPERAND_DIAGNOSTIC_TYPES
71   };
72 
73   BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
74                const MCInstrInfo &MII, const MCTargetOptions &Options)
75       : MCTargetAsmParser(Options, STI, MII) {
76     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
77   }
78 };
79 
80 /// BPFOperand - Instances of this class represent a parsed machine
81 /// instruction
82 struct BPFOperand : public MCParsedAsmOperand {
83 
84   enum KindTy {
85     Token,
86     Register,
87     Immediate,
88   } Kind;
89 
90   struct RegOp {
91     MCRegister RegNum;
92   };
93 
94   struct ImmOp {
95     const MCExpr *Val;
96   };
97 
98   SMLoc StartLoc, EndLoc;
99   union {
100     StringRef Tok;
101     RegOp Reg;
102     ImmOp Imm;
103   };
104 
105   BPFOperand(KindTy K) : Kind(K) {}
106 
107 public:
108   BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
109     Kind = o.Kind;
110     StartLoc = o.StartLoc;
111     EndLoc = o.EndLoc;
112 
113     switch (Kind) {
114     case Register:
115       Reg = o.Reg;
116       break;
117     case Immediate:
118       Imm = o.Imm;
119       break;
120     case Token:
121       Tok = o.Tok;
122       break;
123     }
124   }
125 
126   bool isToken() const override { return Kind == Token; }
127   bool isReg() const override { return Kind == Register; }
128   bool isImm() const override { return Kind == Immediate; }
129   bool isMem() const override { return false; }
130 
131   bool isConstantImm() const {
132     return isImm() && isa<MCConstantExpr>(getImm());
133   }
134 
135   int64_t getConstantImm() const {
136     const MCExpr *Val = getImm();
137     return static_cast<const MCConstantExpr *>(Val)->getValue();
138   }
139 
140   bool isSImm16() const {
141     return (isConstantImm() && isInt<16>(getConstantImm()));
142   }
143 
144   bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(getImm()); }
145 
146   bool isBrTarget() const { return isSymbolRef() || isSImm16(); }
147 
148   /// getStartLoc - Gets location of the first token of this operand
149   SMLoc getStartLoc() const override { return StartLoc; }
150   /// getEndLoc - Gets location of the last token of this operand
151   SMLoc getEndLoc() const override { return EndLoc; }
152 
153   MCRegister getReg() const override {
154     assert(Kind == Register && "Invalid type access!");
155     return Reg.RegNum;
156   }
157 
158   const MCExpr *getImm() const {
159     assert(Kind == Immediate && "Invalid type access!");
160     return Imm.Val;
161   }
162 
163   StringRef getToken() const {
164     assert(Kind == Token && "Invalid type access!");
165     return Tok;
166   }
167 
168   void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
169     switch (Kind) {
170     case Immediate:
171       MAI.printExpr(OS, *getImm());
172       break;
173     case Register:
174       OS << "<register x";
175       OS << getReg() << ">";
176       break;
177     case Token:
178       OS << "'" << getToken() << "'";
179       break;
180     }
181   }
182 
183   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
184     assert(Expr && "Expr shouldn't be null!");
185 
186     if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
187       Inst.addOperand(MCOperand::createImm(CE->getValue()));
188     else
189       Inst.addOperand(MCOperand::createExpr(Expr));
190   }
191 
192   // Used by the TableGen Code
193   void addRegOperands(MCInst &Inst, unsigned N) const {
194     assert(N == 1 && "Invalid number of operands!");
195     Inst.addOperand(MCOperand::createReg(getReg()));
196   }
197 
198   void addImmOperands(MCInst &Inst, unsigned N) const {
199     assert(N == 1 && "Invalid number of operands!");
200     addExpr(Inst, getImm());
201   }
202 
203   static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
204     auto Op = std::make_unique<BPFOperand>(Token);
205     Op->Tok = Str;
206     Op->StartLoc = S;
207     Op->EndLoc = S;
208     return Op;
209   }
210 
211   static std::unique_ptr<BPFOperand> createReg(MCRegister Reg, SMLoc S,
212                                                SMLoc E) {
213     auto Op = std::make_unique<BPFOperand>(Register);
214     Op->Reg.RegNum = Reg;
215     Op->StartLoc = S;
216     Op->EndLoc = E;
217     return Op;
218   }
219 
220   static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
221                                                SMLoc E) {
222     auto Op = std::make_unique<BPFOperand>(Immediate);
223     Op->Imm.Val = Val;
224     Op->StartLoc = S;
225     Op->EndLoc = E;
226     return Op;
227   }
228 
229   // Identifiers that can be used at the start of a statment.
230   static bool isValidIdAtStart(StringRef Name) {
231     return StringSwitch<bool>(Name.lower())
232         .Case("if", true)
233         .Case("call", true)
234         .Case("callx", true)
235         .Case("goto", true)
236         .Case("gotol", true)
237         .Case("may_goto", true)
238         .Case("*", true)
239         .Case("exit", true)
240         .Case("lock", true)
241         .Case("ld_pseudo", true)
242         .Case("store_release", true)
243         .Default(false);
244   }
245 
246   // Identifiers that can be used in the middle of a statment.
247   static bool isValidIdInMiddle(StringRef Name) {
248     return StringSwitch<bool>(Name.lower())
249         .Case("u64", true)
250         .Case("u32", true)
251         .Case("u16", true)
252         .Case("u8", true)
253         .Case("s32", true)
254         .Case("s16", true)
255         .Case("s8", true)
256         .Case("be64", true)
257         .Case("be32", true)
258         .Case("be16", true)
259         .Case("le64", true)
260         .Case("le32", true)
261         .Case("le16", true)
262         .Case("bswap16", true)
263         .Case("bswap32", true)
264         .Case("bswap64", true)
265         .Case("goto", true)
266         .Case("ll", true)
267         .Case("skb", true)
268         .Case("s", true)
269         .Case("atomic_fetch_add", true)
270         .Case("atomic_fetch_and", true)
271         .Case("atomic_fetch_or", true)
272         .Case("atomic_fetch_xor", true)
273         .Case("xchg_64", true)
274         .Case("xchg32_32", true)
275         .Case("cmpxchg_64", true)
276         .Case("cmpxchg32_32", true)
277         .Case("addr_space_cast", true)
278         .Case("load_acquire", true)
279         .Default(false);
280   }
281 };
282 } // end anonymous namespace.
283 
284 #define GET_REGISTER_MATCHER
285 #define GET_MATCHER_IMPLEMENTATION
286 #include "BPFGenAsmMatcher.inc"
287 
288 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
289 
290   if (Operands.size() == 4) {
291     // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
292     // reg1 must be the same as reg2
293     BPFOperand &Op0 = (BPFOperand &)*Operands[0];
294     BPFOperand &Op1 = (BPFOperand &)*Operands[1];
295     BPFOperand &Op2 = (BPFOperand &)*Operands[2];
296     BPFOperand &Op3 = (BPFOperand &)*Operands[3];
297     if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
298         && Op1.getToken() == "="
299         && (Op2.getToken() == "-" || Op2.getToken() == "be16"
300             || Op2.getToken() == "be32" || Op2.getToken() == "be64"
301             || Op2.getToken() == "le16" || Op2.getToken() == "le32"
302             || Op2.getToken() == "le64")
303         && Op0.getReg() != Op3.getReg())
304       return true;
305   }
306 
307   return false;
308 }
309 
310 bool BPFAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
311                                            OperandVector &Operands,
312                                            MCStreamer &Out, uint64_t &ErrorInfo,
313                                            bool MatchingInlineAsm) {
314   MCInst Inst;
315   SMLoc ErrorLoc;
316 
317   if (PreMatchCheck(Operands))
318     return Error(IDLoc, "additional inst constraint not met");
319 
320   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
321   default:
322     break;
323   case Match_Success:
324     Inst.setLoc(IDLoc);
325     Out.emitInstruction(Inst, getSTI());
326     return false;
327   case Match_MissingFeature:
328     return Error(IDLoc, "instruction use requires an option to be enabled");
329   case Match_MnemonicFail:
330     return Error(IDLoc, "unrecognized instruction mnemonic");
331   case Match_InvalidOperand:
332     ErrorLoc = IDLoc;
333 
334     if (ErrorInfo != ~0U) {
335       if (ErrorInfo >= Operands.size())
336         return Error(ErrorLoc, "too few operands for instruction");
337 
338       ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
339 
340       if (ErrorLoc == SMLoc())
341         ErrorLoc = IDLoc;
342     }
343 
344     return Error(ErrorLoc, "invalid operand for instruction");
345   case Match_InvalidBrTarget:
346     return Error(Operands[ErrorInfo]->getStartLoc(),
347                  "operand is not an identifier or 16-bit signed integer");
348   case Match_InvalidSImm16:
349     return Error(Operands[ErrorInfo]->getStartLoc(),
350                  "operand is not a 16-bit signed integer");
351   case Match_InvalidTiedOperand:
352     return Error(Operands[ErrorInfo]->getStartLoc(),
353                  "operand is not the same as the dst register");
354   }
355 
356   llvm_unreachable("Unknown match type detected!");
357 }
358 
359 bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
360                                  SMLoc &EndLoc) {
361   if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
362     return Error(StartLoc, "invalid register name");
363   return false;
364 }
365 
366 ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
367                                            SMLoc &EndLoc) {
368   const AsmToken &Tok = getParser().getTok();
369   StartLoc = Tok.getLoc();
370   EndLoc = Tok.getEndLoc();
371   Reg = BPF::NoRegister;
372   StringRef Name = getLexer().getTok().getIdentifier();
373 
374   if (!MatchRegisterName(Name)) {
375     getParser().Lex(); // Eat identifier token.
376     return ParseStatus::Success;
377   }
378 
379   return ParseStatus::NoMatch;
380 }
381 
382 ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
383   SMLoc S = getLoc();
384 
385   if (getLexer().getKind() == AsmToken::Identifier) {
386     StringRef Name = getLexer().getTok().getIdentifier();
387 
388     if (BPFOperand::isValidIdInMiddle(Name)) {
389       getLexer().Lex();
390       Operands.push_back(BPFOperand::createToken(Name, S));
391       return ParseStatus::Success;
392     }
393 
394     return ParseStatus::NoMatch;
395   }
396 
397   switch (getLexer().getKind()) {
398   case AsmToken::Minus:
399   case AsmToken::Plus: {
400     if (getLexer().peekTok().is(AsmToken::Integer))
401       return ParseStatus::NoMatch;
402     [[fallthrough]];
403   }
404 
405   case AsmToken::Equal:
406   case AsmToken::Greater:
407   case AsmToken::Less:
408   case AsmToken::Pipe:
409   case AsmToken::Star:
410   case AsmToken::LParen:
411   case AsmToken::RParen:
412   case AsmToken::LBrac:
413   case AsmToken::RBrac:
414   case AsmToken::Slash:
415   case AsmToken::Amp:
416   case AsmToken::Percent:
417   case AsmToken::Caret: {
418     StringRef Name = getLexer().getTok().getString();
419     getLexer().Lex();
420     Operands.push_back(BPFOperand::createToken(Name, S));
421 
422     return ParseStatus::Success;
423   }
424 
425   case AsmToken::EqualEqual:
426   case AsmToken::ExclaimEqual:
427   case AsmToken::GreaterEqual:
428   case AsmToken::GreaterGreater:
429   case AsmToken::LessEqual:
430   case AsmToken::LessLess: {
431     Operands.push_back(BPFOperand::createToken(
432         getLexer().getTok().getString().substr(0, 1), S));
433     Operands.push_back(BPFOperand::createToken(
434         getLexer().getTok().getString().substr(1, 1), S));
435     getLexer().Lex();
436 
437     return ParseStatus::Success;
438   }
439 
440   default:
441     break;
442   }
443 
444   return ParseStatus::NoMatch;
445 }
446 
447 ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) {
448   SMLoc S = getLoc();
449   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
450 
451   switch (getLexer().getKind()) {
452   default:
453     return ParseStatus::NoMatch;
454   case AsmToken::Identifier:
455     StringRef Name = getLexer().getTok().getIdentifier();
456     MCRegister Reg = MatchRegisterName(Name);
457 
458     if (!Reg)
459       return ParseStatus::NoMatch;
460 
461     getLexer().Lex();
462     Operands.push_back(BPFOperand::createReg(Reg, S, E));
463   }
464   return ParseStatus::Success;
465 }
466 
467 ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
468   switch (getLexer().getKind()) {
469   default:
470     return ParseStatus::NoMatch;
471   case AsmToken::LParen:
472   case AsmToken::Minus:
473   case AsmToken::Plus:
474   case AsmToken::Integer:
475   case AsmToken::String:
476   case AsmToken::Identifier:
477     break;
478   }
479 
480   const MCExpr *IdVal;
481   SMLoc S = getLoc();
482 
483   if (getParser().parseExpression(IdVal))
484     return ParseStatus::Failure;
485 
486   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
487   Operands.push_back(BPFOperand::createImm(IdVal, S, E));
488 
489   return ParseStatus::Success;
490 }
491 
492 /// Parse an BPF instruction which is in BPF verifier format.
493 bool BPFAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
494                                     SMLoc NameLoc, OperandVector &Operands) {
495   // The first operand could be either register or actually an operator.
496   MCRegister Reg = MatchRegisterName(Name);
497 
498   if (Reg) {
499     SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
500     Operands.push_back(BPFOperand::createReg(Reg, NameLoc, E));
501   } else if (BPFOperand::isValidIdAtStart(Name))
502     Operands.push_back(BPFOperand::createToken(Name, NameLoc));
503   else
504     return Error(NameLoc, "invalid register/token name");
505 
506   while (!getLexer().is(AsmToken::EndOfStatement)) {
507     // Attempt to parse token as operator
508     if (parseOperandAsOperator(Operands).isSuccess())
509       continue;
510 
511     // Attempt to parse token as register
512     if (parseRegister(Operands).isSuccess())
513       continue;
514 
515     if (getLexer().is(AsmToken::Comma)) {
516       getLexer().Lex();
517       continue;
518     }
519 
520     // Attempt to parse token as an immediate
521     if (!parseImmediate(Operands).isSuccess()) {
522       SMLoc Loc = getLexer().getLoc();
523       return Error(Loc, "unexpected token");
524     }
525   }
526 
527   if (getLexer().isNot(AsmToken::EndOfStatement)) {
528     SMLoc Loc = getLexer().getLoc();
529 
530     getParser().eatToEndOfStatement();
531 
532     return Error(Loc, "unexpected token");
533   }
534 
535   // Consume the EndOfStatement.
536   getParser().Lex();
537   return false;
538 }
539 
540 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
541   RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
542   RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
543   RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
544 }
545