xref: /freebsd/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp (revision ac77b2621508c6a50ab01d07fe8d43795d908f05)
1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/BPFMCTargetDesc.h"
10 #include "TargetInfo/BPFTargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/MC/MCContext.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCInstrInfo.h"
17 #include "llvm/MC/MCParser/MCAsmLexer.h"
18 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
19 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
20 #include "llvm/MC/MCRegisterInfo.h"
21 #include "llvm/MC/MCStreamer.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/MC/TargetRegistry.h"
24 #include "llvm/Support/Casting.h"
25 
26 using namespace llvm;
27 
28 namespace {
29 struct BPFOperand;
30 
31 class BPFAsmParser : public MCTargetAsmParser {
32 
33   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34 
35   bool PreMatchCheck(OperandVector &Operands);
36 
37   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
38                                OperandVector &Operands, MCStreamer &Out,
39                                uint64_t &ErrorInfo,
40                                bool MatchingInlineAsm) override;
41 
42   bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override;
43   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
44                                SMLoc &EndLoc) override;
45 
46   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
47                         SMLoc NameLoc, OperandVector &Operands) override;
48 
49   // "=" is used as assignment operator for assembly statment, so can't be used
50   // for symbol assignment.
51   bool equalIsAsmAssignment() override { return false; }
52   // "*" is used for dereferencing memory that it will be the start of
53   // statement.
54   bool starIsStartOfStatement() override { return true; }
55 
56 #define GET_ASSEMBLER_HEADER
57 #include "BPFGenAsmMatcher.inc"
58 
59   ParseStatus parseImmediate(OperandVector &Operands);
60   ParseStatus parseRegister(OperandVector &Operands);
61   ParseStatus parseOperandAsOperator(OperandVector &Operands);
62 
63 public:
64   enum BPFMatchResultTy {
65     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
66 #define GET_OPERAND_DIAGNOSTIC_TYPES
67 #include "BPFGenAsmMatcher.inc"
68 #undef GET_OPERAND_DIAGNOSTIC_TYPES
69   };
70 
71   BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
72                const MCInstrInfo &MII, const MCTargetOptions &Options)
73       : MCTargetAsmParser(Options, STI, MII) {
74     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
75   }
76 };
77 
78 /// BPFOperand - Instances of this class represent a parsed machine
79 /// instruction
80 struct BPFOperand : public MCParsedAsmOperand {
81 
82   enum KindTy {
83     Token,
84     Register,
85     Immediate,
86   } Kind;
87 
88   struct RegOp {
89     unsigned RegNum;
90   };
91 
92   struct ImmOp {
93     const MCExpr *Val;
94   };
95 
96   SMLoc StartLoc, EndLoc;
97   union {
98     StringRef Tok;
99     RegOp Reg;
100     ImmOp Imm;
101   };
102 
103   BPFOperand(KindTy K) : Kind(K) {}
104 
105 public:
106   BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
107     Kind = o.Kind;
108     StartLoc = o.StartLoc;
109     EndLoc = o.EndLoc;
110 
111     switch (Kind) {
112     case Register:
113       Reg = o.Reg;
114       break;
115     case Immediate:
116       Imm = o.Imm;
117       break;
118     case Token:
119       Tok = o.Tok;
120       break;
121     }
122   }
123 
124   bool isToken() const override { return Kind == Token; }
125   bool isReg() const override { return Kind == Register; }
126   bool isImm() const override { return Kind == Immediate; }
127   bool isMem() const override { return false; }
128 
129   bool isConstantImm() const {
130     return isImm() && isa<MCConstantExpr>(getImm());
131   }
132 
133   int64_t getConstantImm() const {
134     const MCExpr *Val = getImm();
135     return static_cast<const MCConstantExpr *>(Val)->getValue();
136   }
137 
138   bool isSImm16() const {
139     return (isConstantImm() && isInt<16>(getConstantImm()));
140   }
141 
142   bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(getImm()); }
143 
144   bool isBrTarget() const { return isSymbolRef() || isSImm16(); }
145 
146   /// getStartLoc - Gets location of the first token of this operand
147   SMLoc getStartLoc() const override { return StartLoc; }
148   /// getEndLoc - Gets location of the last token of this operand
149   SMLoc getEndLoc() const override { return EndLoc; }
150 
151   unsigned getReg() const override {
152     assert(Kind == Register && "Invalid type access!");
153     return Reg.RegNum;
154   }
155 
156   const MCExpr *getImm() const {
157     assert(Kind == Immediate && "Invalid type access!");
158     return Imm.Val;
159   }
160 
161   StringRef getToken() const {
162     assert(Kind == Token && "Invalid type access!");
163     return Tok;
164   }
165 
166   void print(raw_ostream &OS) const override {
167     switch (Kind) {
168     case Immediate:
169       OS << *getImm();
170       break;
171     case Register:
172       OS << "<register x";
173       OS << getReg() << ">";
174       break;
175     case Token:
176       OS << "'" << getToken() << "'";
177       break;
178     }
179   }
180 
181   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
182     assert(Expr && "Expr shouldn't be null!");
183 
184     if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
185       Inst.addOperand(MCOperand::createImm(CE->getValue()));
186     else
187       Inst.addOperand(MCOperand::createExpr(Expr));
188   }
189 
190   // Used by the TableGen Code
191   void addRegOperands(MCInst &Inst, unsigned N) const {
192     assert(N == 1 && "Invalid number of operands!");
193     Inst.addOperand(MCOperand::createReg(getReg()));
194   }
195 
196   void addImmOperands(MCInst &Inst, unsigned N) const {
197     assert(N == 1 && "Invalid number of operands!");
198     addExpr(Inst, getImm());
199   }
200 
201   static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
202     auto Op = std::make_unique<BPFOperand>(Token);
203     Op->Tok = Str;
204     Op->StartLoc = S;
205     Op->EndLoc = S;
206     return Op;
207   }
208 
209   static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
210                                                SMLoc E) {
211     auto Op = std::make_unique<BPFOperand>(Register);
212     Op->Reg.RegNum = RegNo;
213     Op->StartLoc = S;
214     Op->EndLoc = E;
215     return Op;
216   }
217 
218   static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
219                                                SMLoc E) {
220     auto Op = std::make_unique<BPFOperand>(Immediate);
221     Op->Imm.Val = Val;
222     Op->StartLoc = S;
223     Op->EndLoc = E;
224     return Op;
225   }
226 
227   // Identifiers that can be used at the start of a statment.
228   static bool isValidIdAtStart(StringRef Name) {
229     return StringSwitch<bool>(Name.lower())
230         .Case("if", true)
231         .Case("call", true)
232         .Case("goto", true)
233         .Case("gotol", true)
234         .Case("*", true)
235         .Case("exit", true)
236         .Case("lock", true)
237         .Case("ld_pseudo", true)
238         .Default(false);
239   }
240 
241   // Identifiers that can be used in the middle of a statment.
242   static bool isValidIdInMiddle(StringRef Name) {
243     return StringSwitch<bool>(Name.lower())
244         .Case("u64", true)
245         .Case("u32", true)
246         .Case("u16", true)
247         .Case("u8", true)
248         .Case("s32", true)
249         .Case("s16", true)
250         .Case("s8", true)
251         .Case("be64", true)
252         .Case("be32", true)
253         .Case("be16", true)
254         .Case("le64", true)
255         .Case("le32", true)
256         .Case("le16", true)
257         .Case("bswap16", true)
258         .Case("bswap32", true)
259         .Case("bswap64", true)
260         .Case("goto", true)
261         .Case("gotol", true)
262         .Case("ll", true)
263         .Case("skb", true)
264         .Case("s", true)
265         .Case("atomic_fetch_add", true)
266         .Case("atomic_fetch_and", true)
267         .Case("atomic_fetch_or", true)
268         .Case("atomic_fetch_xor", true)
269         .Case("xchg_64", true)
270         .Case("xchg32_32", true)
271         .Case("cmpxchg_64", true)
272         .Case("cmpxchg32_32", true)
273         .Default(false);
274   }
275 };
276 } // end anonymous namespace.
277 
278 #define GET_REGISTER_MATCHER
279 #define GET_MATCHER_IMPLEMENTATION
280 #include "BPFGenAsmMatcher.inc"
281 
282 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
283 
284   if (Operands.size() == 4) {
285     // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
286     // reg1 must be the same as reg2
287     BPFOperand &Op0 = (BPFOperand &)*Operands[0];
288     BPFOperand &Op1 = (BPFOperand &)*Operands[1];
289     BPFOperand &Op2 = (BPFOperand &)*Operands[2];
290     BPFOperand &Op3 = (BPFOperand &)*Operands[3];
291     if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
292         && Op1.getToken() == "="
293         && (Op2.getToken() == "-" || Op2.getToken() == "be16"
294             || Op2.getToken() == "be32" || Op2.getToken() == "be64"
295             || Op2.getToken() == "le16" || Op2.getToken() == "le32"
296             || Op2.getToken() == "le64")
297         && Op0.getReg() != Op3.getReg())
298       return true;
299   }
300 
301   return false;
302 }
303 
304 bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
305                                            OperandVector &Operands,
306                                            MCStreamer &Out, uint64_t &ErrorInfo,
307                                            bool MatchingInlineAsm) {
308   MCInst Inst;
309   SMLoc ErrorLoc;
310 
311   if (PreMatchCheck(Operands))
312     return Error(IDLoc, "additional inst constraint not met");
313 
314   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
315   default:
316     break;
317   case Match_Success:
318     Inst.setLoc(IDLoc);
319     Out.emitInstruction(Inst, getSTI());
320     return false;
321   case Match_MissingFeature:
322     return Error(IDLoc, "instruction use requires an option to be enabled");
323   case Match_MnemonicFail:
324     return Error(IDLoc, "unrecognized instruction mnemonic");
325   case Match_InvalidOperand:
326     ErrorLoc = IDLoc;
327 
328     if (ErrorInfo != ~0U) {
329       if (ErrorInfo >= Operands.size())
330         return Error(ErrorLoc, "too few operands for instruction");
331 
332       ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
333 
334       if (ErrorLoc == SMLoc())
335         ErrorLoc = IDLoc;
336     }
337 
338     return Error(ErrorLoc, "invalid operand for instruction");
339   case Match_InvalidBrTarget:
340     return Error(Operands[ErrorInfo]->getStartLoc(),
341                  "operand is not an identifier or 16-bit signed integer");
342   case Match_InvalidSImm16:
343     return Error(Operands[ErrorInfo]->getStartLoc(),
344                  "operand is not a 16-bit signed integer");
345   }
346 
347   llvm_unreachable("Unknown match type detected!");
348 }
349 
350 bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
351                                  SMLoc &EndLoc) {
352   if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
353     return Error(StartLoc, "invalid register name");
354   return false;
355 }
356 
357 ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
358                                            SMLoc &EndLoc) {
359   const AsmToken &Tok = getParser().getTok();
360   StartLoc = Tok.getLoc();
361   EndLoc = Tok.getEndLoc();
362   Reg = BPF::NoRegister;
363   StringRef Name = getLexer().getTok().getIdentifier();
364 
365   if (!MatchRegisterName(Name)) {
366     getParser().Lex(); // Eat identifier token.
367     return ParseStatus::Success;
368   }
369 
370   return ParseStatus::NoMatch;
371 }
372 
373 ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
374   SMLoc S = getLoc();
375 
376   if (getLexer().getKind() == AsmToken::Identifier) {
377     StringRef Name = getLexer().getTok().getIdentifier();
378 
379     if (BPFOperand::isValidIdInMiddle(Name)) {
380       getLexer().Lex();
381       Operands.push_back(BPFOperand::createToken(Name, S));
382       return ParseStatus::Success;
383     }
384 
385     return ParseStatus::NoMatch;
386   }
387 
388   switch (getLexer().getKind()) {
389   case AsmToken::Minus:
390   case AsmToken::Plus: {
391     if (getLexer().peekTok().is(AsmToken::Integer))
392       return ParseStatus::NoMatch;
393     [[fallthrough]];
394   }
395 
396   case AsmToken::Equal:
397   case AsmToken::Greater:
398   case AsmToken::Less:
399   case AsmToken::Pipe:
400   case AsmToken::Star:
401   case AsmToken::LParen:
402   case AsmToken::RParen:
403   case AsmToken::LBrac:
404   case AsmToken::RBrac:
405   case AsmToken::Slash:
406   case AsmToken::Amp:
407   case AsmToken::Percent:
408   case AsmToken::Caret: {
409     StringRef Name = getLexer().getTok().getString();
410     getLexer().Lex();
411     Operands.push_back(BPFOperand::createToken(Name, S));
412 
413     return ParseStatus::Success;
414   }
415 
416   case AsmToken::EqualEqual:
417   case AsmToken::ExclaimEqual:
418   case AsmToken::GreaterEqual:
419   case AsmToken::GreaterGreater:
420   case AsmToken::LessEqual:
421   case AsmToken::LessLess: {
422     Operands.push_back(BPFOperand::createToken(
423         getLexer().getTok().getString().substr(0, 1), S));
424     Operands.push_back(BPFOperand::createToken(
425         getLexer().getTok().getString().substr(1, 1), S));
426     getLexer().Lex();
427 
428     return ParseStatus::Success;
429   }
430 
431   default:
432     break;
433   }
434 
435   return ParseStatus::NoMatch;
436 }
437 
438 ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) {
439   SMLoc S = getLoc();
440   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
441 
442   switch (getLexer().getKind()) {
443   default:
444     return ParseStatus::NoMatch;
445   case AsmToken::Identifier:
446     StringRef Name = getLexer().getTok().getIdentifier();
447     unsigned RegNo = MatchRegisterName(Name);
448 
449     if (RegNo == 0)
450       return ParseStatus::NoMatch;
451 
452     getLexer().Lex();
453     Operands.push_back(BPFOperand::createReg(RegNo, S, E));
454   }
455   return ParseStatus::Success;
456 }
457 
458 ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
459   switch (getLexer().getKind()) {
460   default:
461     return ParseStatus::NoMatch;
462   case AsmToken::LParen:
463   case AsmToken::Minus:
464   case AsmToken::Plus:
465   case AsmToken::Integer:
466   case AsmToken::String:
467   case AsmToken::Identifier:
468     break;
469   }
470 
471   const MCExpr *IdVal;
472   SMLoc S = getLoc();
473 
474   if (getParser().parseExpression(IdVal))
475     return ParseStatus::Failure;
476 
477   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
478   Operands.push_back(BPFOperand::createImm(IdVal, S, E));
479 
480   return ParseStatus::Success;
481 }
482 
483 /// ParseInstruction - Parse an BPF instruction which is in BPF verifier
484 /// format.
485 bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
486                                     SMLoc NameLoc, OperandVector &Operands) {
487   // The first operand could be either register or actually an operator.
488   unsigned RegNo = MatchRegisterName(Name);
489 
490   if (RegNo != 0) {
491     SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
492     Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
493   } else if (BPFOperand::isValidIdAtStart (Name))
494     Operands.push_back(BPFOperand::createToken(Name, NameLoc));
495   else
496     return Error(NameLoc, "invalid register/token name");
497 
498   while (!getLexer().is(AsmToken::EndOfStatement)) {
499     // Attempt to parse token as operator
500     if (parseOperandAsOperator(Operands).isSuccess())
501       continue;
502 
503     // Attempt to parse token as register
504     if (parseRegister(Operands).isSuccess())
505       continue;
506 
507     if (getLexer().is(AsmToken::Comma)) {
508       getLexer().Lex();
509       continue;
510     }
511 
512     // Attempt to parse token as an immediate
513     if (!parseImmediate(Operands).isSuccess()) {
514       SMLoc Loc = getLexer().getLoc();
515       return Error(Loc, "unexpected token");
516     }
517   }
518 
519   if (getLexer().isNot(AsmToken::EndOfStatement)) {
520     SMLoc Loc = getLexer().getLoc();
521 
522     getParser().eatToEndOfStatement();
523 
524     return Error(Loc, "unexpected token");
525   }
526 
527   // Consume the EndOfStatement.
528   getParser().Lex();
529   return false;
530 }
531 
532 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
533   RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
534   RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
535   RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
536 }
537