xref: /freebsd/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp (revision 35c0a8c449fd2b7f75029ebed5e10852240f0865)
1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/BPFMCTargetDesc.h"
10 #include "TargetInfo/BPFTargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/MC/MCContext.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCInstrInfo.h"
17 #include "llvm/MC/MCParser/MCAsmLexer.h"
18 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
19 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
20 #include "llvm/MC/MCRegisterInfo.h"
21 #include "llvm/MC/MCStreamer.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/MC/TargetRegistry.h"
24 #include "llvm/Support/Casting.h"
25 
26 using namespace llvm;
27 
28 namespace {
29 struct BPFOperand;
30 
31 class BPFAsmParser : public MCTargetAsmParser {
32 
33   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34 
35   bool PreMatchCheck(OperandVector &Operands);
36 
37   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
38                                OperandVector &Operands, MCStreamer &Out,
39                                uint64_t &ErrorInfo,
40                                bool MatchingInlineAsm) override;
41 
42   bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override;
43   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
44                                SMLoc &EndLoc) override;
45 
46   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
47                         SMLoc NameLoc, OperandVector &Operands) override;
48 
49   // "=" is used as assignment operator for assembly statment, so can't be used
50   // for symbol assignment.
51   bool equalIsAsmAssignment() override { return false; }
52   // "*" is used for dereferencing memory that it will be the start of
53   // statement.
54   bool starIsStartOfStatement() override { return true; }
55 
56 #define GET_ASSEMBLER_HEADER
57 #include "BPFGenAsmMatcher.inc"
58 
59   ParseStatus parseImmediate(OperandVector &Operands);
60   ParseStatus parseRegister(OperandVector &Operands);
61   ParseStatus parseOperandAsOperator(OperandVector &Operands);
62 
63 public:
64   enum BPFMatchResultTy {
65     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
66 #define GET_OPERAND_DIAGNOSTIC_TYPES
67 #include "BPFGenAsmMatcher.inc"
68 #undef GET_OPERAND_DIAGNOSTIC_TYPES
69   };
70 
71   BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
72                const MCInstrInfo &MII, const MCTargetOptions &Options)
73       : MCTargetAsmParser(Options, STI, MII) {
74     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
75   }
76 };
77 
78 /// BPFOperand - Instances of this class represent a parsed machine
79 /// instruction
80 struct BPFOperand : public MCParsedAsmOperand {
81 
82   enum KindTy {
83     Token,
84     Register,
85     Immediate,
86   } Kind;
87 
88   struct RegOp {
89     unsigned RegNum;
90   };
91 
92   struct ImmOp {
93     const MCExpr *Val;
94   };
95 
96   SMLoc StartLoc, EndLoc;
97   union {
98     StringRef Tok;
99     RegOp Reg;
100     ImmOp Imm;
101   };
102 
103   BPFOperand(KindTy K) : Kind(K) {}
104 
105 public:
106   BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
107     Kind = o.Kind;
108     StartLoc = o.StartLoc;
109     EndLoc = o.EndLoc;
110 
111     switch (Kind) {
112     case Register:
113       Reg = o.Reg;
114       break;
115     case Immediate:
116       Imm = o.Imm;
117       break;
118     case Token:
119       Tok = o.Tok;
120       break;
121     }
122   }
123 
124   bool isToken() const override { return Kind == Token; }
125   bool isReg() const override { return Kind == Register; }
126   bool isImm() const override { return Kind == Immediate; }
127   bool isMem() const override { return false; }
128 
129   bool isConstantImm() const {
130     return isImm() && isa<MCConstantExpr>(getImm());
131   }
132 
133   int64_t getConstantImm() const {
134     const MCExpr *Val = getImm();
135     return static_cast<const MCConstantExpr *>(Val)->getValue();
136   }
137 
138   bool isSImm16() const {
139     return (isConstantImm() && isInt<16>(getConstantImm()));
140   }
141 
142   bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(getImm()); }
143 
144   bool isBrTarget() const { return isSymbolRef() || isSImm16(); }
145 
146   /// getStartLoc - Gets location of the first token of this operand
147   SMLoc getStartLoc() const override { return StartLoc; }
148   /// getEndLoc - Gets location of the last token of this operand
149   SMLoc getEndLoc() const override { return EndLoc; }
150 
151   MCRegister getReg() const override {
152     assert(Kind == Register && "Invalid type access!");
153     return Reg.RegNum;
154   }
155 
156   const MCExpr *getImm() const {
157     assert(Kind == Immediate && "Invalid type access!");
158     return Imm.Val;
159   }
160 
161   StringRef getToken() const {
162     assert(Kind == Token && "Invalid type access!");
163     return Tok;
164   }
165 
166   void print(raw_ostream &OS) const override {
167     switch (Kind) {
168     case Immediate:
169       OS << *getImm();
170       break;
171     case Register:
172       OS << "<register x";
173       OS << getReg() << ">";
174       break;
175     case Token:
176       OS << "'" << getToken() << "'";
177       break;
178     }
179   }
180 
181   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
182     assert(Expr && "Expr shouldn't be null!");
183 
184     if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
185       Inst.addOperand(MCOperand::createImm(CE->getValue()));
186     else
187       Inst.addOperand(MCOperand::createExpr(Expr));
188   }
189 
190   // Used by the TableGen Code
191   void addRegOperands(MCInst &Inst, unsigned N) const {
192     assert(N == 1 && "Invalid number of operands!");
193     Inst.addOperand(MCOperand::createReg(getReg()));
194   }
195 
196   void addImmOperands(MCInst &Inst, unsigned N) const {
197     assert(N == 1 && "Invalid number of operands!");
198     addExpr(Inst, getImm());
199   }
200 
201   static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
202     auto Op = std::make_unique<BPFOperand>(Token);
203     Op->Tok = Str;
204     Op->StartLoc = S;
205     Op->EndLoc = S;
206     return Op;
207   }
208 
209   static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
210                                                SMLoc E) {
211     auto Op = std::make_unique<BPFOperand>(Register);
212     Op->Reg.RegNum = RegNo;
213     Op->StartLoc = S;
214     Op->EndLoc = E;
215     return Op;
216   }
217 
218   static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
219                                                SMLoc E) {
220     auto Op = std::make_unique<BPFOperand>(Immediate);
221     Op->Imm.Val = Val;
222     Op->StartLoc = S;
223     Op->EndLoc = E;
224     return Op;
225   }
226 
227   // Identifiers that can be used at the start of a statment.
228   static bool isValidIdAtStart(StringRef Name) {
229     return StringSwitch<bool>(Name.lower())
230         .Case("if", true)
231         .Case("call", true)
232         .Case("callx", true)
233         .Case("goto", true)
234         .Case("gotol", true)
235         .Case("may_goto", true)
236         .Case("*", true)
237         .Case("exit", true)
238         .Case("lock", true)
239         .Case("ld_pseudo", true)
240         .Default(false);
241   }
242 
243   // Identifiers that can be used in the middle of a statment.
244   static bool isValidIdInMiddle(StringRef Name) {
245     return StringSwitch<bool>(Name.lower())
246         .Case("u64", true)
247         .Case("u32", true)
248         .Case("u16", true)
249         .Case("u8", true)
250         .Case("s32", true)
251         .Case("s16", true)
252         .Case("s8", true)
253         .Case("be64", true)
254         .Case("be32", true)
255         .Case("be16", true)
256         .Case("le64", true)
257         .Case("le32", true)
258         .Case("le16", true)
259         .Case("bswap16", true)
260         .Case("bswap32", true)
261         .Case("bswap64", true)
262         .Case("goto", true)
263         .Case("gotol", true)
264         .Case("ll", true)
265         .Case("skb", true)
266         .Case("s", true)
267         .Case("atomic_fetch_add", true)
268         .Case("atomic_fetch_and", true)
269         .Case("atomic_fetch_or", true)
270         .Case("atomic_fetch_xor", true)
271         .Case("xchg_64", true)
272         .Case("xchg32_32", true)
273         .Case("cmpxchg_64", true)
274         .Case("cmpxchg32_32", true)
275         .Case("addr_space_cast", true)
276         .Default(false);
277   }
278 };
279 } // end anonymous namespace.
280 
281 #define GET_REGISTER_MATCHER
282 #define GET_MATCHER_IMPLEMENTATION
283 #include "BPFGenAsmMatcher.inc"
284 
285 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
286 
287   if (Operands.size() == 4) {
288     // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
289     // reg1 must be the same as reg2
290     BPFOperand &Op0 = (BPFOperand &)*Operands[0];
291     BPFOperand &Op1 = (BPFOperand &)*Operands[1];
292     BPFOperand &Op2 = (BPFOperand &)*Operands[2];
293     BPFOperand &Op3 = (BPFOperand &)*Operands[3];
294     if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
295         && Op1.getToken() == "="
296         && (Op2.getToken() == "-" || Op2.getToken() == "be16"
297             || Op2.getToken() == "be32" || Op2.getToken() == "be64"
298             || Op2.getToken() == "le16" || Op2.getToken() == "le32"
299             || Op2.getToken() == "le64")
300         && Op0.getReg() != Op3.getReg())
301       return true;
302   }
303 
304   return false;
305 }
306 
307 bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
308                                            OperandVector &Operands,
309                                            MCStreamer &Out, uint64_t &ErrorInfo,
310                                            bool MatchingInlineAsm) {
311   MCInst Inst;
312   SMLoc ErrorLoc;
313 
314   if (PreMatchCheck(Operands))
315     return Error(IDLoc, "additional inst constraint not met");
316 
317   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
318   default:
319     break;
320   case Match_Success:
321     Inst.setLoc(IDLoc);
322     Out.emitInstruction(Inst, getSTI());
323     return false;
324   case Match_MissingFeature:
325     return Error(IDLoc, "instruction use requires an option to be enabled");
326   case Match_MnemonicFail:
327     return Error(IDLoc, "unrecognized instruction mnemonic");
328   case Match_InvalidOperand:
329     ErrorLoc = IDLoc;
330 
331     if (ErrorInfo != ~0U) {
332       if (ErrorInfo >= Operands.size())
333         return Error(ErrorLoc, "too few operands for instruction");
334 
335       ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
336 
337       if (ErrorLoc == SMLoc())
338         ErrorLoc = IDLoc;
339     }
340 
341     return Error(ErrorLoc, "invalid operand for instruction");
342   case Match_InvalidBrTarget:
343     return Error(Operands[ErrorInfo]->getStartLoc(),
344                  "operand is not an identifier or 16-bit signed integer");
345   case Match_InvalidSImm16:
346     return Error(Operands[ErrorInfo]->getStartLoc(),
347                  "operand is not a 16-bit signed integer");
348   }
349 
350   llvm_unreachable("Unknown match type detected!");
351 }
352 
353 bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
354                                  SMLoc &EndLoc) {
355   if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
356     return Error(StartLoc, "invalid register name");
357   return false;
358 }
359 
360 ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
361                                            SMLoc &EndLoc) {
362   const AsmToken &Tok = getParser().getTok();
363   StartLoc = Tok.getLoc();
364   EndLoc = Tok.getEndLoc();
365   Reg = BPF::NoRegister;
366   StringRef Name = getLexer().getTok().getIdentifier();
367 
368   if (!MatchRegisterName(Name)) {
369     getParser().Lex(); // Eat identifier token.
370     return ParseStatus::Success;
371   }
372 
373   return ParseStatus::NoMatch;
374 }
375 
376 ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
377   SMLoc S = getLoc();
378 
379   if (getLexer().getKind() == AsmToken::Identifier) {
380     StringRef Name = getLexer().getTok().getIdentifier();
381 
382     if (BPFOperand::isValidIdInMiddle(Name)) {
383       getLexer().Lex();
384       Operands.push_back(BPFOperand::createToken(Name, S));
385       return ParseStatus::Success;
386     }
387 
388     return ParseStatus::NoMatch;
389   }
390 
391   switch (getLexer().getKind()) {
392   case AsmToken::Minus:
393   case AsmToken::Plus: {
394     if (getLexer().peekTok().is(AsmToken::Integer))
395       return ParseStatus::NoMatch;
396     [[fallthrough]];
397   }
398 
399   case AsmToken::Equal:
400   case AsmToken::Greater:
401   case AsmToken::Less:
402   case AsmToken::Pipe:
403   case AsmToken::Star:
404   case AsmToken::LParen:
405   case AsmToken::RParen:
406   case AsmToken::LBrac:
407   case AsmToken::RBrac:
408   case AsmToken::Slash:
409   case AsmToken::Amp:
410   case AsmToken::Percent:
411   case AsmToken::Caret: {
412     StringRef Name = getLexer().getTok().getString();
413     getLexer().Lex();
414     Operands.push_back(BPFOperand::createToken(Name, S));
415 
416     return ParseStatus::Success;
417   }
418 
419   case AsmToken::EqualEqual:
420   case AsmToken::ExclaimEqual:
421   case AsmToken::GreaterEqual:
422   case AsmToken::GreaterGreater:
423   case AsmToken::LessEqual:
424   case AsmToken::LessLess: {
425     Operands.push_back(BPFOperand::createToken(
426         getLexer().getTok().getString().substr(0, 1), S));
427     Operands.push_back(BPFOperand::createToken(
428         getLexer().getTok().getString().substr(1, 1), S));
429     getLexer().Lex();
430 
431     return ParseStatus::Success;
432   }
433 
434   default:
435     break;
436   }
437 
438   return ParseStatus::NoMatch;
439 }
440 
441 ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) {
442   SMLoc S = getLoc();
443   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
444 
445   switch (getLexer().getKind()) {
446   default:
447     return ParseStatus::NoMatch;
448   case AsmToken::Identifier:
449     StringRef Name = getLexer().getTok().getIdentifier();
450     unsigned RegNo = MatchRegisterName(Name);
451 
452     if (RegNo == 0)
453       return ParseStatus::NoMatch;
454 
455     getLexer().Lex();
456     Operands.push_back(BPFOperand::createReg(RegNo, S, E));
457   }
458   return ParseStatus::Success;
459 }
460 
461 ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
462   switch (getLexer().getKind()) {
463   default:
464     return ParseStatus::NoMatch;
465   case AsmToken::LParen:
466   case AsmToken::Minus:
467   case AsmToken::Plus:
468   case AsmToken::Integer:
469   case AsmToken::String:
470   case AsmToken::Identifier:
471     break;
472   }
473 
474   const MCExpr *IdVal;
475   SMLoc S = getLoc();
476 
477   if (getParser().parseExpression(IdVal))
478     return ParseStatus::Failure;
479 
480   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
481   Operands.push_back(BPFOperand::createImm(IdVal, S, E));
482 
483   return ParseStatus::Success;
484 }
485 
486 /// ParseInstruction - Parse an BPF instruction which is in BPF verifier
487 /// format.
488 bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
489                                     SMLoc NameLoc, OperandVector &Operands) {
490   // The first operand could be either register or actually an operator.
491   unsigned RegNo = MatchRegisterName(Name);
492 
493   if (RegNo != 0) {
494     SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
495     Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
496   } else if (BPFOperand::isValidIdAtStart (Name))
497     Operands.push_back(BPFOperand::createToken(Name, NameLoc));
498   else
499     return Error(NameLoc, "invalid register/token name");
500 
501   while (!getLexer().is(AsmToken::EndOfStatement)) {
502     // Attempt to parse token as operator
503     if (parseOperandAsOperator(Operands).isSuccess())
504       continue;
505 
506     // Attempt to parse token as register
507     if (parseRegister(Operands).isSuccess())
508       continue;
509 
510     if (getLexer().is(AsmToken::Comma)) {
511       getLexer().Lex();
512       continue;
513     }
514 
515     // Attempt to parse token as an immediate
516     if (!parseImmediate(Operands).isSuccess()) {
517       SMLoc Loc = getLexer().getLoc();
518       return Error(Loc, "unexpected token");
519     }
520   }
521 
522   if (getLexer().isNot(AsmToken::EndOfStatement)) {
523     SMLoc Loc = getLexer().getLoc();
524 
525     getParser().eatToEndOfStatement();
526 
527     return Error(Loc, "unexpected token");
528   }
529 
530   // Consume the EndOfStatement.
531   getParser().Lex();
532   return false;
533 }
534 
535 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
536   RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
537   RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
538   RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
539 }
540