xref: /freebsd/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp (revision 43a5ec4eb41567cc92586503212743d89686d78f)
1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/BPFMCTargetDesc.h"
10 #include "TargetInfo/BPFTargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/MC/MCContext.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCParser/MCAsmLexer.h"
17 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
18 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
19 #include "llvm/MC/MCRegisterInfo.h"
20 #include "llvm/MC/MCStreamer.h"
21 #include "llvm/MC/MCSubtargetInfo.h"
22 #include "llvm/Support/Casting.h"
23 #include "llvm/Support/TargetRegistry.h"
24 
25 using namespace llvm;
26 
27 namespace {
28 struct BPFOperand;
29 
30 class BPFAsmParser : public MCTargetAsmParser {
31 
32   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
33 
34   bool PreMatchCheck(OperandVector &Operands);
35 
36   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
37                                OperandVector &Operands, MCStreamer &Out,
38                                uint64_t &ErrorInfo,
39                                bool MatchingInlineAsm) override;
40 
41   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
42   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
43                                         SMLoc &EndLoc) override;
44 
45   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
46                         SMLoc NameLoc, OperandVector &Operands) override;
47 
48   bool ParseDirective(AsmToken DirectiveID) override;
49 
50   // "=" is used as assignment operator for assembly statment, so can't be used
51   // for symbol assignment.
52   bool equalIsAsmAssignment() override { return false; }
53   // "*" is used for dereferencing memory that it will be the start of
54   // statement.
55   bool starIsStartOfStatement() override { return true; }
56 
57 #define GET_ASSEMBLER_HEADER
58 #include "BPFGenAsmMatcher.inc"
59 
60   OperandMatchResultTy parseImmediate(OperandVector &Operands);
61   OperandMatchResultTy parseRegister(OperandVector &Operands);
62   OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
63 
64 public:
65   enum BPFMatchResultTy {
66     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
67 #define GET_OPERAND_DIAGNOSTIC_TYPES
68 #include "BPFGenAsmMatcher.inc"
69 #undef GET_OPERAND_DIAGNOSTIC_TYPES
70   };
71 
72   BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
73                const MCInstrInfo &MII, const MCTargetOptions &Options)
74       : MCTargetAsmParser(Options, STI, MII) {
75     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
76   }
77 };
78 
79 /// BPFOperand - Instances of this class represent a parsed machine
80 /// instruction
81 struct BPFOperand : public MCParsedAsmOperand {
82 
83   enum KindTy {
84     Token,
85     Register,
86     Immediate,
87   } Kind;
88 
89   struct RegOp {
90     unsigned RegNum;
91   };
92 
93   struct ImmOp {
94     const MCExpr *Val;
95   };
96 
97   SMLoc StartLoc, EndLoc;
98   union {
99     StringRef Tok;
100     RegOp Reg;
101     ImmOp Imm;
102   };
103 
104   BPFOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
105 
106 public:
107   BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
108     Kind = o.Kind;
109     StartLoc = o.StartLoc;
110     EndLoc = o.EndLoc;
111 
112     switch (Kind) {
113     case Register:
114       Reg = o.Reg;
115       break;
116     case Immediate:
117       Imm = o.Imm;
118       break;
119     case Token:
120       Tok = o.Tok;
121       break;
122     }
123   }
124 
125   bool isToken() const override { return Kind == Token; }
126   bool isReg() const override { return Kind == Register; }
127   bool isImm() const override { return Kind == Immediate; }
128   bool isMem() const override { return false; }
129 
130   bool isConstantImm() const {
131     return isImm() && isa<MCConstantExpr>(getImm());
132   }
133 
134   int64_t getConstantImm() const {
135     const MCExpr *Val = getImm();
136     return static_cast<const MCConstantExpr *>(Val)->getValue();
137   }
138 
139   bool isSImm12() const {
140     return (isConstantImm() && isInt<12>(getConstantImm()));
141   }
142 
143   /// getStartLoc - Gets location of the first token of this operand
144   SMLoc getStartLoc() const override { return StartLoc; }
145   /// getEndLoc - Gets location of the last token of this operand
146   SMLoc getEndLoc() const override { return EndLoc; }
147 
148   unsigned getReg() const override {
149     assert(Kind == Register && "Invalid type access!");
150     return Reg.RegNum;
151   }
152 
153   const MCExpr *getImm() const {
154     assert(Kind == Immediate && "Invalid type access!");
155     return Imm.Val;
156   }
157 
158   StringRef getToken() const {
159     assert(Kind == Token && "Invalid type access!");
160     return Tok;
161   }
162 
163   void print(raw_ostream &OS) const override {
164     switch (Kind) {
165     case Immediate:
166       OS << *getImm();
167       break;
168     case Register:
169       OS << "<register x";
170       OS << getReg() << ">";
171       break;
172     case Token:
173       OS << "'" << getToken() << "'";
174       break;
175     }
176   }
177 
178   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
179     assert(Expr && "Expr shouldn't be null!");
180 
181     if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
182       Inst.addOperand(MCOperand::createImm(CE->getValue()));
183     else
184       Inst.addOperand(MCOperand::createExpr(Expr));
185   }
186 
187   // Used by the TableGen Code
188   void addRegOperands(MCInst &Inst, unsigned N) const {
189     assert(N == 1 && "Invalid number of operands!");
190     Inst.addOperand(MCOperand::createReg(getReg()));
191   }
192 
193   void addImmOperands(MCInst &Inst, unsigned N) const {
194     assert(N == 1 && "Invalid number of operands!");
195     addExpr(Inst, getImm());
196   }
197 
198   static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
199     auto Op = std::make_unique<BPFOperand>(Token);
200     Op->Tok = Str;
201     Op->StartLoc = S;
202     Op->EndLoc = S;
203     return Op;
204   }
205 
206   static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
207                                                SMLoc E) {
208     auto Op = std::make_unique<BPFOperand>(Register);
209     Op->Reg.RegNum = RegNo;
210     Op->StartLoc = S;
211     Op->EndLoc = E;
212     return Op;
213   }
214 
215   static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
216                                                SMLoc E) {
217     auto Op = std::make_unique<BPFOperand>(Immediate);
218     Op->Imm.Val = Val;
219     Op->StartLoc = S;
220     Op->EndLoc = E;
221     return Op;
222   }
223 
224   // Identifiers that can be used at the start of a statment.
225   static bool isValidIdAtStart(StringRef Name) {
226     return StringSwitch<bool>(Name.lower())
227         .Case("if", true)
228         .Case("call", true)
229         .Case("goto", true)
230         .Case("*", true)
231         .Case("exit", true)
232         .Case("lock", true)
233         .Case("ld_pseudo", true)
234         .Default(false);
235   }
236 
237   // Identifiers that can be used in the middle of a statment.
238   static bool isValidIdInMiddle(StringRef Name) {
239     return StringSwitch<bool>(Name.lower())
240         .Case("u64", true)
241         .Case("u32", true)
242         .Case("u16", true)
243         .Case("u8", true)
244         .Case("be64", true)
245         .Case("be32", true)
246         .Case("be16", true)
247         .Case("le64", true)
248         .Case("le32", true)
249         .Case("le16", true)
250         .Case("goto", true)
251         .Case("ll", true)
252         .Case("skb", true)
253         .Case("s", true)
254         .Default(false);
255   }
256 };
257 } // end anonymous namespace.
258 
259 #define GET_REGISTER_MATCHER
260 #define GET_MATCHER_IMPLEMENTATION
261 #include "BPFGenAsmMatcher.inc"
262 
263 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
264 
265   if (Operands.size() == 4) {
266     // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
267     // reg1 must be the same as reg2
268     BPFOperand &Op0 = (BPFOperand &)*Operands[0];
269     BPFOperand &Op1 = (BPFOperand &)*Operands[1];
270     BPFOperand &Op2 = (BPFOperand &)*Operands[2];
271     BPFOperand &Op3 = (BPFOperand &)*Operands[3];
272     if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
273         && Op1.getToken() == "="
274         && (Op2.getToken() == "-" || Op2.getToken() == "be16"
275             || Op2.getToken() == "be32" || Op2.getToken() == "be64"
276             || Op2.getToken() == "le16" || Op2.getToken() == "le32"
277             || Op2.getToken() == "le64")
278         && Op0.getReg() != Op3.getReg())
279       return true;
280   }
281 
282   return false;
283 }
284 
285 bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
286                                            OperandVector &Operands,
287                                            MCStreamer &Out, uint64_t &ErrorInfo,
288                                            bool MatchingInlineAsm) {
289   MCInst Inst;
290   SMLoc ErrorLoc;
291 
292   if (PreMatchCheck(Operands))
293     return Error(IDLoc, "additional inst constraint not met");
294 
295   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
296   default:
297     break;
298   case Match_Success:
299     Inst.setLoc(IDLoc);
300     Out.emitInstruction(Inst, getSTI());
301     return false;
302   case Match_MissingFeature:
303     return Error(IDLoc, "instruction use requires an option to be enabled");
304   case Match_MnemonicFail:
305     return Error(IDLoc, "unrecognized instruction mnemonic");
306   case Match_InvalidOperand:
307     ErrorLoc = IDLoc;
308 
309     if (ErrorInfo != ~0U) {
310       if (ErrorInfo >= Operands.size())
311         return Error(ErrorLoc, "too few operands for instruction");
312 
313       ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
314 
315       if (ErrorLoc == SMLoc())
316         ErrorLoc = IDLoc;
317     }
318 
319     return Error(ErrorLoc, "invalid operand for instruction");
320   }
321 
322   llvm_unreachable("Unknown match type detected!");
323 }
324 
325 bool BPFAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
326                                  SMLoc &EndLoc) {
327   if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
328     return Error(StartLoc, "invalid register name");
329   return false;
330 }
331 
332 OperandMatchResultTy BPFAsmParser::tryParseRegister(unsigned &RegNo,
333                                                     SMLoc &StartLoc,
334                                                     SMLoc &EndLoc) {
335   const AsmToken &Tok = getParser().getTok();
336   StartLoc = Tok.getLoc();
337   EndLoc = Tok.getEndLoc();
338   RegNo = 0;
339   StringRef Name = getLexer().getTok().getIdentifier();
340 
341   if (!MatchRegisterName(Name)) {
342     getParser().Lex(); // Eat identifier token.
343     return MatchOperand_Success;
344   }
345 
346   return MatchOperand_NoMatch;
347 }
348 
349 OperandMatchResultTy
350 BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
351   SMLoc S = getLoc();
352 
353   if (getLexer().getKind() == AsmToken::Identifier) {
354     StringRef Name = getLexer().getTok().getIdentifier();
355 
356     if (BPFOperand::isValidIdInMiddle(Name)) {
357       getLexer().Lex();
358       Operands.push_back(BPFOperand::createToken(Name, S));
359       return MatchOperand_Success;
360     }
361 
362     return MatchOperand_NoMatch;
363   }
364 
365   switch (getLexer().getKind()) {
366   case AsmToken::Minus:
367   case AsmToken::Plus: {
368     if (getLexer().peekTok().is(AsmToken::Integer))
369       return MatchOperand_NoMatch;
370     LLVM_FALLTHROUGH;
371   }
372 
373   case AsmToken::Equal:
374   case AsmToken::Greater:
375   case AsmToken::Less:
376   case AsmToken::Pipe:
377   case AsmToken::Star:
378   case AsmToken::LParen:
379   case AsmToken::RParen:
380   case AsmToken::LBrac:
381   case AsmToken::RBrac:
382   case AsmToken::Slash:
383   case AsmToken::Amp:
384   case AsmToken::Percent:
385   case AsmToken::Caret: {
386     StringRef Name = getLexer().getTok().getString();
387     getLexer().Lex();
388     Operands.push_back(BPFOperand::createToken(Name, S));
389 
390     return MatchOperand_Success;
391   }
392 
393   case AsmToken::EqualEqual:
394   case AsmToken::ExclaimEqual:
395   case AsmToken::GreaterEqual:
396   case AsmToken::GreaterGreater:
397   case AsmToken::LessEqual:
398   case AsmToken::LessLess: {
399     Operands.push_back(BPFOperand::createToken(
400         getLexer().getTok().getString().substr(0, 1), S));
401     Operands.push_back(BPFOperand::createToken(
402         getLexer().getTok().getString().substr(1, 1), S));
403     getLexer().Lex();
404 
405     return MatchOperand_Success;
406   }
407 
408   default:
409     break;
410   }
411 
412   return MatchOperand_NoMatch;
413 }
414 
415 OperandMatchResultTy BPFAsmParser::parseRegister(OperandVector &Operands) {
416   SMLoc S = getLoc();
417   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
418 
419   switch (getLexer().getKind()) {
420   default:
421     return MatchOperand_NoMatch;
422   case AsmToken::Identifier:
423     StringRef Name = getLexer().getTok().getIdentifier();
424     unsigned RegNo = MatchRegisterName(Name);
425 
426     if (RegNo == 0)
427       return MatchOperand_NoMatch;
428 
429     getLexer().Lex();
430     Operands.push_back(BPFOperand::createReg(RegNo, S, E));
431   }
432   return MatchOperand_Success;
433 }
434 
435 OperandMatchResultTy BPFAsmParser::parseImmediate(OperandVector &Operands) {
436   switch (getLexer().getKind()) {
437   default:
438     return MatchOperand_NoMatch;
439   case AsmToken::LParen:
440   case AsmToken::Minus:
441   case AsmToken::Plus:
442   case AsmToken::Integer:
443   case AsmToken::String:
444   case AsmToken::Identifier:
445     break;
446   }
447 
448   const MCExpr *IdVal;
449   SMLoc S = getLoc();
450 
451   if (getParser().parseExpression(IdVal))
452     return MatchOperand_ParseFail;
453 
454   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
455   Operands.push_back(BPFOperand::createImm(IdVal, S, E));
456 
457   return MatchOperand_Success;
458 }
459 
460 /// ParseInstruction - Parse an BPF instruction which is in BPF verifier
461 /// format.
462 bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
463                                     SMLoc NameLoc, OperandVector &Operands) {
464   // The first operand could be either register or actually an operator.
465   unsigned RegNo = MatchRegisterName(Name);
466 
467   if (RegNo != 0) {
468     SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
469     Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
470   } else if (BPFOperand::isValidIdAtStart (Name))
471     Operands.push_back(BPFOperand::createToken(Name, NameLoc));
472   else
473     return Error(NameLoc, "invalid register/token name");
474 
475   while (!getLexer().is(AsmToken::EndOfStatement)) {
476     // Attempt to parse token as operator
477     if (parseOperandAsOperator(Operands) == MatchOperand_Success)
478       continue;
479 
480     // Attempt to parse token as register
481     if (parseRegister(Operands) == MatchOperand_Success)
482       continue;
483 
484     // Attempt to parse token as an immediate
485     if (parseImmediate(Operands) != MatchOperand_Success) {
486       SMLoc Loc = getLexer().getLoc();
487       return Error(Loc, "unexpected token");
488     }
489   }
490 
491   if (getLexer().isNot(AsmToken::EndOfStatement)) {
492     SMLoc Loc = getLexer().getLoc();
493 
494     getParser().eatToEndOfStatement();
495 
496     return Error(Loc, "unexpected token");
497   }
498 
499   // Consume the EndOfStatement.
500   getParser().Lex();
501   return false;
502 }
503 
504 bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
505 
506 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
507   RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
508   RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
509   RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
510 }
511