LLVM 22.0.0git
BPFAsmParser.cpp
Go to the documentation of this file.
1//===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
13#include "llvm/MC/MCContext.h"
14#include "llvm/MC/MCExpr.h"
15#include "llvm/MC/MCInst.h"
16#include "llvm/MC/MCInstrInfo.h"
20#include "llvm/MC/MCStreamer.h"
25
26using namespace llvm;
27
28namespace {
29struct BPFOperand;
30
31class BPFAsmParser : public MCTargetAsmParser {
32
33 SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34
35 bool PreMatchCheck(OperandVector &Operands);
36
37 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
38 OperandVector &Operands, MCStreamer &Out,
39 uint64_t &ErrorInfo,
40 bool MatchingInlineAsm) override;
41
42 bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override;
43 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
44 SMLoc &EndLoc) override;
45
46 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
47 SMLoc NameLoc, OperandVector &Operands) override;
48
49 // "=" is used as assignment operator for assembly statment, so can't be used
50 // for symbol assignment.
51 bool equalIsAsmAssignment() override { return false; }
52 // "*" is used for dereferencing memory that it will be the start of
53 // statement.
54 bool tokenIsStartOfStatement(AsmToken::TokenKind Token) override {
55 return Token == AsmToken::Star;
56 }
57
58#define GET_ASSEMBLER_HEADER
59#include "BPFGenAsmMatcher.inc"
60
62 ParseStatus parseRegister(OperandVector &Operands);
63 ParseStatus parseOperandAsOperator(OperandVector &Operands);
64
65public:
66 enum BPFMatchResultTy {
67 Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
68#define GET_OPERAND_DIAGNOSTIC_TYPES
69#include "BPFGenAsmMatcher.inc"
70#undef GET_OPERAND_DIAGNOSTIC_TYPES
71 };
72
73 BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
74 const MCInstrInfo &MII, const MCTargetOptions &Options)
75 : MCTargetAsmParser(Options, STI, MII) {
76 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
77 }
78};
79
80/// BPFOperand - Instances of this class represent a parsed machine
81/// instruction
82struct BPFOperand : public MCParsedAsmOperand {
83
84 enum KindTy {
85 Token,
86 Register,
87 Immediate,
88 } Kind;
89
90 struct RegOp {
91 MCRegister RegNum;
92 };
93
94 struct ImmOp {
95 const MCExpr *Val;
96 };
97
98 SMLoc StartLoc, EndLoc;
99 union {
100 StringRef Tok;
101 RegOp Reg;
102 ImmOp Imm;
103 };
104
105 BPFOperand(KindTy K) : Kind(K) {}
106
107public:
108 BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
109 Kind = o.Kind;
110 StartLoc = o.StartLoc;
111 EndLoc = o.EndLoc;
112
113 switch (Kind) {
114 case Register:
115 Reg = o.Reg;
116 break;
117 case Immediate:
118 Imm = o.Imm;
119 break;
120 case Token:
121 Tok = o.Tok;
122 break;
123 }
124 }
125
126 bool isToken() const override { return Kind == Token; }
127 bool isReg() const override { return Kind == Register; }
128 bool isImm() const override { return Kind == Immediate; }
129 bool isMem() const override { return false; }
130
131 bool isConstantImm() const {
132 return isImm() && isa<MCConstantExpr>(getImm());
133 }
134
135 int64_t getConstantImm() const {
136 const MCExpr *Val = getImm();
137 return static_cast<const MCConstantExpr *>(Val)->getValue();
138 }
139
140 bool isSImm16() const {
141 return (isConstantImm() && isInt<16>(getConstantImm()));
142 }
143
144 bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(getImm()); }
145
146 bool isBrTarget() const { return isSymbolRef() || isSImm16(); }
147
148 /// getStartLoc - Gets location of the first token of this operand
149 SMLoc getStartLoc() const override { return StartLoc; }
150 /// getEndLoc - Gets location of the last token of this operand
151 SMLoc getEndLoc() const override { return EndLoc; }
152
153 MCRegister getReg() const override {
154 assert(Kind == Register && "Invalid type access!");
155 return Reg.RegNum;
156 }
157
158 const MCExpr *getImm() const {
159 assert(Kind == Immediate && "Invalid type access!");
160 return Imm.Val;
161 }
162
163 StringRef getToken() const {
164 assert(Kind == Token && "Invalid type access!");
165 return Tok;
166 }
167
168 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
169 switch (Kind) {
170 case Immediate:
171 MAI.printExpr(OS, *getImm());
172 break;
173 case Register:
174 OS << "<register x";
175 OS << getReg() << ">";
176 break;
177 case Token:
178 OS << "'" << getToken() << "'";
179 break;
180 }
181 }
182
183 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
184 assert(Expr && "Expr shouldn't be null!");
185
186 if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
187 Inst.addOperand(MCOperand::createImm(CE->getValue()));
188 else
190 }
191
192 // Used by the TableGen Code
193 void addRegOperands(MCInst &Inst, unsigned N) const {
194 assert(N == 1 && "Invalid number of operands!");
196 }
197
198 void addImmOperands(MCInst &Inst, unsigned N) const {
199 assert(N == 1 && "Invalid number of operands!");
200 addExpr(Inst, getImm());
201 }
202
203 static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
204 auto Op = std::make_unique<BPFOperand>(Token);
205 Op->Tok = Str;
206 Op->StartLoc = S;
207 Op->EndLoc = S;
208 return Op;
209 }
210
211 static std::unique_ptr<BPFOperand> createReg(MCRegister Reg, SMLoc S,
212 SMLoc E) {
213 auto Op = std::make_unique<BPFOperand>(Register);
214 Op->Reg.RegNum = Reg;
215 Op->StartLoc = S;
216 Op->EndLoc = E;
217 return Op;
218 }
219
220 static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
221 SMLoc E) {
222 auto Op = std::make_unique<BPFOperand>(Immediate);
223 Op->Imm.Val = Val;
224 Op->StartLoc = S;
225 Op->EndLoc = E;
226 return Op;
227 }
228
229 // Identifiers that can be used at the start of a statment.
230 static bool isValidIdAtStart(StringRef Name) {
231 return StringSwitch<bool>(Name.lower())
232 .Case("if", true)
233 .Case("call", true)
234 .Case("callx", true)
235 .Case("goto", true)
236 .Case("gotol", true)
237 .Case("gotox", true)
238 .Case("may_goto", true)
239 .Case("*", true)
240 .Case("exit", true)
241 .Case("lock", true)
242 .Case("ld_pseudo", true)
243 .Case("store_release", true)
244 .Default(false);
245 }
246
247 // Identifiers that can be used in the middle of a statment.
248 static bool isValidIdInMiddle(StringRef Name) {
249 return StringSwitch<bool>(Name.lower())
250 .Case("u64", true)
251 .Case("u32", true)
252 .Case("u16", true)
253 .Case("u8", true)
254 .Case("s32", true)
255 .Case("s16", true)
256 .Case("s8", true)
257 .Case("be64", true)
258 .Case("be32", true)
259 .Case("be16", true)
260 .Case("le64", true)
261 .Case("le32", true)
262 .Case("le16", true)
263 .Case("bswap16", true)
264 .Case("bswap32", true)
265 .Case("bswap64", true)
266 .Case("goto", true)
267 .Case("ll", true)
268 .Case("skb", true)
269 .Case("s", true)
270 .Case("atomic_fetch_add", true)
271 .Case("atomic_fetch_and", true)
272 .Case("atomic_fetch_or", true)
273 .Case("atomic_fetch_xor", true)
274 .Case("xchg_64", true)
275 .Case("xchg32_32", true)
276 .Case("cmpxchg_64", true)
277 .Case("cmpxchg32_32", true)
278 .Case("addr_space_cast", true)
279 .Case("load_acquire", true)
280 .Default(false);
281 }
282};
283} // end anonymous namespace.
284
285#define GET_REGISTER_MATCHER
286#define GET_MATCHER_IMPLEMENTATION
287#include "BPFGenAsmMatcher.inc"
288
289bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
290
291 if (Operands.size() == 4) {
292 // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
293 // reg1 must be the same as reg2
294 BPFOperand &Op0 = (BPFOperand &)*Operands[0];
295 BPFOperand &Op1 = (BPFOperand &)*Operands[1];
296 BPFOperand &Op2 = (BPFOperand &)*Operands[2];
297 BPFOperand &Op3 = (BPFOperand &)*Operands[3];
298 if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
299 && Op1.getToken() == "="
300 && (Op2.getToken() == "-" || Op2.getToken() == "be16"
301 || Op2.getToken() == "be32" || Op2.getToken() == "be64"
302 || Op2.getToken() == "le16" || Op2.getToken() == "le32"
303 || Op2.getToken() == "le64")
304 && Op0.getReg() != Op3.getReg())
305 return true;
306 }
307
308 return false;
309}
310
311bool BPFAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
313 MCStreamer &Out, uint64_t &ErrorInfo,
314 bool MatchingInlineAsm) {
315 MCInst Inst;
316 SMLoc ErrorLoc;
317
318 if (PreMatchCheck(Operands))
319 return Error(IDLoc, "additional inst constraint not met");
320
321 switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
322 default:
323 break;
324 case Match_Success:
325 Inst.setLoc(IDLoc);
326 Out.emitInstruction(Inst, getSTI());
327 return false;
328 case Match_MissingFeature:
329 return Error(IDLoc, "instruction use requires an option to be enabled");
330 case Match_MnemonicFail:
331 return Error(IDLoc, "unrecognized instruction mnemonic");
332 case Match_InvalidOperand:
333 ErrorLoc = IDLoc;
334
335 if (ErrorInfo != ~0U) {
336 if (ErrorInfo >= Operands.size())
337 return Error(ErrorLoc, "too few operands for instruction");
338
339 ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
340
341 if (ErrorLoc == SMLoc())
342 ErrorLoc = IDLoc;
343 }
344
345 return Error(ErrorLoc, "invalid operand for instruction");
346 case Match_InvalidBrTarget:
347 return Error(Operands[ErrorInfo]->getStartLoc(),
348 "operand is not an identifier or 16-bit signed integer");
349 case Match_InvalidSImm16:
350 return Error(Operands[ErrorInfo]->getStartLoc(),
351 "operand is not a 16-bit signed integer");
352 case Match_InvalidTiedOperand:
353 return Error(Operands[ErrorInfo]->getStartLoc(),
354 "operand is not the same as the dst register");
355 }
356
357 llvm_unreachable("Unknown match type detected!");
358}
359
360bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
361 SMLoc &EndLoc) {
362 if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
363 return Error(StartLoc, "invalid register name");
364 return false;
365}
366
367ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
368 SMLoc &EndLoc) {
369 const AsmToken &Tok = getParser().getTok();
370 StartLoc = Tok.getLoc();
371 EndLoc = Tok.getEndLoc();
372 Reg = BPF::NoRegister;
373 StringRef Name = getLexer().getTok().getIdentifier();
374
375 if (!MatchRegisterName(Name)) {
376 getParser().Lex(); // Eat identifier token.
378 }
379
381}
382
383ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
384 SMLoc S = getLoc();
385
386 if (getLexer().getKind() == AsmToken::Identifier) {
387 StringRef Name = getLexer().getTok().getIdentifier();
388
389 if (BPFOperand::isValidIdInMiddle(Name)) {
390 getLexer().Lex();
391 Operands.push_back(BPFOperand::createToken(Name, S));
393 }
394
396 }
397
398 switch (getLexer().getKind()) {
399 case AsmToken::Minus:
400 case AsmToken::Plus: {
401 if (getLexer().peekTok().is(AsmToken::Integer))
403 [[fallthrough]];
404 }
405
406 case AsmToken::Equal:
408 case AsmToken::Less:
409 case AsmToken::Pipe:
410 case AsmToken::Star:
411 case AsmToken::LParen:
412 case AsmToken::RParen:
413 case AsmToken::LBrac:
414 case AsmToken::RBrac:
415 case AsmToken::Slash:
416 case AsmToken::Amp:
418 case AsmToken::Caret: {
419 StringRef Name = getLexer().getTok().getString();
420 getLexer().Lex();
421 Operands.push_back(BPFOperand::createToken(Name, S));
422
424 }
425
431 case AsmToken::LessLess: {
432 Operands.push_back(BPFOperand::createToken(
433 getLexer().getTok().getString().substr(0, 1), S));
434 Operands.push_back(BPFOperand::createToken(
435 getLexer().getTok().getString().substr(1, 1), S));
436 getLexer().Lex();
437
439 }
440
441 default:
442 break;
443 }
444
446}
447
448ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) {
449 SMLoc S = getLoc();
450 SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
451
452 switch (getLexer().getKind()) {
453 default:
456 StringRef Name = getLexer().getTok().getIdentifier();
457 MCRegister Reg = MatchRegisterName(Name);
458
459 if (!Reg)
461
462 getLexer().Lex();
463 Operands.push_back(BPFOperand::createReg(Reg, S, E));
464 }
466}
467
468ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
469 switch (getLexer().getKind()) {
470 default:
472 case AsmToken::LParen:
473 case AsmToken::Minus:
474 case AsmToken::Plus:
476 case AsmToken::String:
478 break;
479 }
480
481 const MCExpr *IdVal;
482 SMLoc S = getLoc();
483
484 if (getParser().parseExpression(IdVal))
486
487 SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
488 Operands.push_back(BPFOperand::createImm(IdVal, S, E));
489
491}
492
493/// Parse an BPF instruction which is in BPF verifier format.
494bool BPFAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
495 SMLoc NameLoc, OperandVector &Operands) {
496 // The first operand could be either register or actually an operator.
497 MCRegister Reg = MatchRegisterName(Name);
498
499 if (Reg) {
500 SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
501 Operands.push_back(BPFOperand::createReg(Reg, NameLoc, E));
502 } else if (BPFOperand::isValidIdAtStart(Name))
503 Operands.push_back(BPFOperand::createToken(Name, NameLoc));
504 else
505 return Error(NameLoc, "invalid register/token name");
506
507 while (!getLexer().is(AsmToken::EndOfStatement)) {
508 // Attempt to parse token as operator
509 if (parseOperandAsOperator(Operands).isSuccess())
510 continue;
511
512 // Attempt to parse token as register
513 if (parseRegister(Operands).isSuccess())
514 continue;
515
516 if (getLexer().is(AsmToken::Comma)) {
517 getLexer().Lex();
518 continue;
519 }
520
521 // Attempt to parse token as an immediate
522 if (!parseImmediate(Operands).isSuccess()) {
523 SMLoc Loc = getLexer().getLoc();
524 return Error(Loc, "unexpected token");
525 }
526 }
527
528 if (getLexer().isNot(AsmToken::EndOfStatement)) {
529 SMLoc Loc = getLexer().getLoc();
530
531 getParser().eatToEndOfStatement();
532
533 return Error(Loc, "unexpected token");
534 }
535
536 // Consume the EndOfStatement.
537 getParser().Lex();
538 return false;
539}
540
static MCRegister MatchRegisterName(StringRef Name)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser()
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
static LVOptions Options
Definition LVOptions.cpp:25
mir Rename Register Operands
Register Reg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
static StringRef substr(StringRef Str, uint64_t Len)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes)
LLVM_ABI SMLoc getLoc() const
Definition AsmLexer.cpp:32
LLVM_ABI SMLoc getEndLoc() const
Definition AsmLexer.cpp:34
void printExpr(raw_ostream &, const MCExpr &) const
void setLoc(SMLoc loc)
Definition MCInst.h:207
void addOperand(const MCOperand Op)
Definition MCInst.h:215
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
const FeatureBitset & getFeatureBits() const
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Ternary parse status returned by various parse* methods.
static constexpr StatusTy Failure
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:36
constexpr const char * getPointer() const
Definition SMLoc.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
This is an optimization pass for GlobalISel generic memory operations.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
Target & getTheBPFleTarget()
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
Target & getTheBPFbeTarget()
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Target & getTheBPFTarget()
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
DWARFExpression::Operation Op
#define N
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...