54#include "llvm/IR/IntrinsicsX86.h"
67#define DEBUG_TYPE "lower-amx-type"
79 if (Ty->isX86_AMXTy())
83 for (
unsigned i = 0; i < Ty->getNumContainedTypes(); i++) {
102 if (V->getType()->isX86_AMXTy())
112 if (
I.getType()->isX86_AMXTy())
124 unsigned AllocaAS =
DL.getAllocaAddrSpace();
126 new AllocaInst(Ty, AllocaAS,
"",
F.getEntryBlock().begin());
145 std::map<Value *, Value *> Col2Row, Row2Col;
156 unsigned Granularity) {
157 if (
auto It = Col2Row.find(V); It != Col2Row.end())
160 Value *RealRow =
nullptr;
178 RealRow = Builder.CreateUDiv(V, Builder.getInt16(4));
187 Col2Row[V] = RealRow;
192 unsigned Granularity) {
193 if (
auto It = Row2Col.find(V); It != Row2Col.end())
196 Value *RealCol =
nullptr;
202 RealCol = Builder.CreateNUWMul(V, Builder.getInt16(Granularity));
211 Row2Col[V] = RealCol;
220 Value *Row =
nullptr, *Col =
nullptr;
221 switch (
II->getIntrinsicID()) {
224 case Intrinsic::x86_t2rpntlvwz0_internal:
225 case Intrinsic::x86_t2rpntlvwz0t1_internal:
226 case Intrinsic::x86_t2rpntlvwz1_internal:
227 case Intrinsic::x86_t2rpntlvwz1t1_internal:
228 case Intrinsic::x86_tileloadd64_internal:
229 case Intrinsic::x86_tileloaddt164_internal:
230 case Intrinsic::x86_tilestored64_internal:
231 case Intrinsic::x86_t2rpntlvwz0rs_internal:
232 case Intrinsic::x86_t2rpntlvwz0rst1_internal:
233 case Intrinsic::x86_t2rpntlvwz1rs_internal:
234 case Intrinsic::x86_t2rpntlvwz1rst1_internal:
235 case Intrinsic::x86_tileloaddrs64_internal:
236 case Intrinsic::x86_tileloaddrst164_internal: {
237 Row =
II->getArgOperand(0);
238 Col =
II->getArgOperand(1);
243 case Intrinsic::x86_tcmmimfp16ps_internal:
244 case Intrinsic::x86_tcmmrlfp16ps_internal:
245 case Intrinsic::x86_tdpbssd_internal:
246 case Intrinsic::x86_tdpbsud_internal:
247 case Intrinsic::x86_tdpbusd_internal:
248 case Intrinsic::x86_tdpbuud_internal:
249 case Intrinsic::x86_tdpbf16ps_internal:
250 case Intrinsic::x86_tdpfp16ps_internal:
251 case Intrinsic::x86_tmmultf32ps_internal:
252 case Intrinsic::x86_tdpbf8ps_internal:
253 case Intrinsic::x86_tdpbhf8ps_internal:
254 case Intrinsic::x86_tdphbf8ps_internal:
255 case Intrinsic::x86_tdphf8ps_internal: {
258 Row =
II->getArgOperand(0);
259 Col =
II->getArgOperand(1);
262 Row =
II->getArgOperand(0);
263 Col =
II->getArgOperand(2);
267 Col =
II->getArgOperand(1);
272 case Intrinsic::x86_ttransposed_internal:
273 case Intrinsic::x86_tconjtfp16_internal: {
274 assert((OpNo == 2) &&
"Illegal Operand Number.");
279 case Intrinsic::x86_tcvtrowd2ps_internal:
280 case Intrinsic::x86_tcvtrowps2bf16h_internal:
281 case Intrinsic::x86_tcvtrowps2bf16l_internal:
282 case Intrinsic::x86_tcvtrowps2phh_internal:
283 case Intrinsic::x86_tcvtrowps2phl_internal:
284 case Intrinsic::x86_tilemovrow_internal: {
285 assert(OpNo == 2 &&
"Illegal Operand Number.");
286 Row =
II->getArgOperand(0);
287 Col =
II->getArgOperand(1);
290 case Intrinsic::x86_ttdpbf16ps_internal:
291 case Intrinsic::x86_ttdpfp16ps_internal:
292 case Intrinsic::x86_ttcmmimfp16ps_internal:
293 case Intrinsic::x86_ttcmmrlfp16ps_internal:
294 case Intrinsic::x86_tconjtcmmimfp16ps_internal:
295 case Intrinsic::x86_ttmmultf32ps_internal: {
298 Row =
II->getArgOperand(0);
299 Col =
II->getArgOperand(1);
307 Col =
II->getArgOperand(1);
314 return std::make_pair(Row, Col);
318 Use &U = *(Phi->use_begin());
320 User *V = U.getUser();
329 Use &U = *(V->use_begin());
330 OpNo = U.getOperandNo();
337 Use &U = *(V->use_begin());
344 return std::make_pair(
nullptr,
nullptr);
348class X86LowerAMXType {
355 std::map<Value *, Value *> Col2Row, Row2Col;
360 void combineLoadBitcast(LoadInst *LD, BitCastInst *Bitcast);
361 void combineBitcastStore(BitCastInst *Bitcast, StoreInst *ST);
362 bool transformBitcast(BitCastInst *Bitcast);
371 Value *Row =
nullptr, *Col =
nullptr;
373 unsigned OpNo =
U.getOperandNo();
378 Value *Stride = Builder.getInt64(64);
379 Value *I8Ptr =
LD->getOperand(0);
380 std::array<Value *, 4>
Args = {Row, Col, I8Ptr, Stride};
383 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, Args);
384 Bitcast->replaceAllUsesWith(NewInst);
394void X86LowerAMXType::combineBitcastStore(BitCastInst *Bitcast, StoreInst *ST) {
400 Value *Row =
II->getOperand(0);
401 Value *Col =
II->getOperand(1);
405 Value *Stride = Builder.getInt64(64);
406 Value *I8Ptr =
ST->getOperand(1);
407 std::array<Value *, 5>
Args = {Row, Col, I8Ptr, Stride, Tile};
408 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, Args);
420 Value *Vec = Builder.CreateLoad(
Bitcast->getType(),
ST->getOperand(1));
421 Bitcast->replaceAllUsesWith(Vec);
425bool X86LowerAMXType::transformBitcast(BitCastInst *Bitcast) {
427 AllocaInst *AllocaAddr;
428 Value *I8Ptr, *Stride;
429 auto *Src =
Bitcast->getOperand(0);
431 auto Prepare = [&](
Type *MemTy) {
434 Stride = Builder.getInt64(64);
437 if (
Bitcast->getType()->isX86_AMXTy()) {
447 unsigned OpNo =
U.getOperandNo();
451 Prepare(
Bitcast->getOperand(0)->getType());
452 Builder.CreateStore(Src, AllocaAddr);
454 Value *Row =
nullptr, *Col =
nullptr;
456 std::array<Value *, 4>
Args = {Row, Col, I8Ptr, Stride};
458 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, Args);
459 Bitcast->replaceAllUsesWith(NewInst);
472 Value *Row =
II->getOperand(0);
473 Value *Col =
II->getOperand(1);
474 std::array<Value *, 5>
Args = {Row, Col, I8Ptr, Stride, Src};
475 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, Args);
476 Value *NewInst = Builder.CreateLoad(
Bitcast->getType(), AllocaAddr);
477 Bitcast->replaceAllUsesWith(NewInst);
483bool X86LowerAMXType::visit() {
484 SmallVector<Instruction *, 8> DeadInsts;
494 if (
Bitcast->getType()->isX86_AMXTy()) {
501 if (transformBitcast(Bitcast))
521 combineLoadBitcast(LD, Bitcast);
525 }
else if (Src->getType()->isX86_AMXTy()) {
530 StoreInst *
ST =
nullptr;
531 for (Use &U :
Bitcast->uses()) {
537 if (transformBitcast(Bitcast))
561 combineBitcastStore(Bitcast, ST);
569 bool C = !DeadInsts.
empty();
571 for (
auto *Inst : DeadInsts)
572 Inst->eraseFromParent();
582 unsigned AllocaAS =
DL.getAllocaAddrSpace();
585 new AllocaInst(V256I32Ty, AllocaAS,
"",
F->getEntryBlock().begin());
588 Builder.SetInsertPoint(&*Iter);
589 Value *I8Ptr = Builder.CreateBitCast(AllocaRes, Builder.getPtrTy());
599 assert(Extr->hasIndices() &&
"Tile extract miss index!");
600 Idx = Extr->getIndices()[0];
604 assert(
II &&
"Not tile intrinsic!");
605 Value *Row =
II->getOperand(Idx);
606 Value *Col =
II->getOperand(Idx + 1);
611 Value *Stride = Builder.getInt64(64);
612 std::array<Value *, 5> Args = {Row, Col,
Ptr, Stride, TileDef};
615 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, Args);
621 assert(V->getType()->isX86_AMXTy() &&
"Not define tile!");
631 assert(Extr->hasIndices() &&
"Tile extract miss index!");
632 Idx = Extr->getIndices()[0];
637 Value *Row =
II->getOperand(Idx);
638 Value *Col =
II->getOperand(Idx + 1);
642 Value *Stride = Builder.getInt64(64);
643 std::array<Value *, 4> Args = {Row, Col,
Ptr, Stride};
646 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, Args);
651 for (
Use &U :
I->uses()) {
652 User *V = U.getUser();
662class X86VolatileTileData {
666 X86VolatileTileData(Function &Func) :
F(
Func) {}
667 Value *updatePhiIncomings(BasicBlock *BB,
668 SmallVector<Instruction *, 2> &Incomings);
669 void replacePhiDefWithLoad(Instruction *
PHI,
Value *StorePtr);
670 bool volatileTileData();
671 void volatileTilePHI(PHINode *
PHI);
672 void volatileTileNonPHI(Instruction *
I);
675Value *X86VolatileTileData::updatePhiIncomings(
676 BasicBlock *BB, SmallVector<Instruction *, 2> &Incomings) {
679 for (
auto *
I : Incomings) {
683 for (Use &U :
I->uses()) {
693void X86VolatileTileData::replacePhiDefWithLoad(Instruction *
PHI,
695 for (Use &U :
PHI->uses())
697 PHI->eraseFromParent();
755void X86VolatileTileData::volatileTilePHI(PHINode *
PHI) {
757 SmallVector<Instruction *, 2> Incomings;
759 for (
unsigned I = 0,
E =
PHI->getNumIncomingValues();
I !=
E; ++
I) {
762 assert(Inst &&
"We shouldn't fold AMX instrution!");
766 Value *StorePtr = updatePhiIncomings(BB, Incomings);
767 replacePhiDefWithLoad(
PHI, StorePtr);
786void X86VolatileTileData::volatileTileNonPHI(Instruction *
I) {
792 for (Use &U :
I->uses()) {
812bool X86VolatileTileData::volatileTileData() {
814 for (BasicBlock &BB :
F) {
815 SmallVector<Instruction *, 2> PHIInsts;
816 SmallVector<Instruction *, 8> AMXDefInsts;
818 for (Instruction &
I : BB) {
819 if (!
I.getType()->isX86_AMXTy())
828 for (Instruction *
I : AMXDefInsts) {
831 volatileTileNonPHI(
I);
835 for (Instruction *
I : PHIInsts) {
847class X86LowerAMXCast {
850 std::unique_ptr<DominatorTree> DT;
853 X86LowerAMXCast(Function &
F, ShapeCalculator *ShapeC)
854 :
Func(
F), SC(ShapeC), DT(nullptr) {}
855 bool combineCastStore(IntrinsicInst *Cast, StoreInst *ST);
856 bool combineLoadCast(IntrinsicInst *Cast, LoadInst *LD);
857 bool combineLdSt(SmallVectorImpl<Instruction *> &Casts);
858 bool combineAMXcast(TargetLibraryInfo *TLI);
859 bool transformAMXCast(IntrinsicInst *AMXCast);
860 bool transformAllAMXCast();
861 bool optimizeAMXCastFromPhi(IntrinsicInst *CI, PHINode *PN,
862 SmallSetVector<Instruction *, 16> &DeadInst);
866 SmallSetVector<Instruction *, 16> &WorkList,
867 const TargetLibraryInfo *TLI) {
874 for (
unsigned i = 0, e =
I->getNumOperands(); i != e; ++i) {
875 Value *OpV =
I->getOperand(i);
876 I->setOperand(i,
nullptr);
890 I->eraseFromParent();
904bool X86LowerAMXCast::optimizeAMXCastFromPhi(
905 IntrinsicInst *CI, PHINode *PN,
906 SmallSetVector<Instruction *, 16> &DeadInst) {
909 Type *SrcTy = Src->getType();
913 SmallSetVector<PHINode *, 4> OldPhiNodes;
921 while (!PhiWorklist.
empty()) {
923 for (
unsigned I = 0;
I < OldPN->getNumOperands(); ++
I) {
924 Value *IncValue = OldPN->getIncomingValue(
I);
931 Value *Row =
nullptr, *Col =
nullptr;
932 std::tie(Row, Col) = SC->
getShape(OldPN);
938 auto *
Block = OldPN->getIncomingBlock(
I);
941 Intrinsic::x86_tilezero_internal, {}, {Row, Col});
943 NewInst = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
944 {IncValue->
getType()}, {NewInst});
947 OldPN->setIncomingValue(
I, NewInst);
952 if (OldPhiNodes.
insert(PNode))
961 if (TyA != DestTy || TyB != SrcTy)
971 for (
auto *OldPN : OldPhiNodes) {
972 for (User *V : OldPN->users()) {
978 if (TyA != DestTy || TyB != SrcTy)
999 if (OldPhiNodes.count(
PHI) == 0)
1007 SmallDenseMap<PHINode *, PHINode *> NewPNodes;
1008 for (
auto *OldPN : OldPhiNodes) {
1009 Builder.SetInsertPoint(OldPN);
1010 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
1011 NewPNodes[OldPN] = NewPN;
1015 for (
auto *OldPN : OldPhiNodes) {
1016 PHINode *NewPN = NewPNodes[OldPN];
1017 for (
unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
1018 Value *
V = OldPN->getOperand(j);
1019 Value *NewV =
nullptr;
1025 NewV = NewPNodes[PrevPN];
1027 NewPN->
addIncoming(NewV, OldPN->getIncomingBlock(j));
1039 for (
auto *OldPN : OldPhiNodes) {
1040 PHINode *NewPN = NewPNodes[OldPN];
1046 assert(TyA == DestTy && TyB == SrcTy);
1063static Value *getShapeFromAMXIntrinsic(
Value *Inst,
unsigned ShapeIdx,
1070 return II->getOperand(0);
1072 assert(ShapeIdx < 2 &&
"Currently 2 shapes in 1 instruction at most!");
1073 return II->getOperand(ShapeIdx + 1);
1081bool X86LowerAMXCast::combineCastStore(IntrinsicInst *Cast, StoreInst *ST) {
1084 assert(Tile->getType()->isX86_AMXTy() &&
"Not Tile Operand!");
1087 if (!Tile->hasOneUse())
1093 Value *Row =
nullptr;
1094 Value *Col =
nullptr;
1100 Row =
II->getOperand(0);
1101 Col =
II->getOperand(1);
1115 assert(
II &&
"We meet unhandle source in fetching tile value!");
1116 unsigned ShapeIdx =
II->getIndices()[0];
1117 Value *Tiles =
II->getOperand(0);
1118 Row = getShapeFromAMXIntrinsic(Tiles, ShapeIdx,
true);
1119 Col = getShapeFromAMXIntrinsic(Tiles, ShapeIdx,
false);
1121 assert(Row && Col &&
"Shape got failed!");
1124 Value *Stride = Builder.CreateSExt(Col, Builder.getInt64Ty());
1125 Value *I8Ptr = Builder.CreateBitCast(
ST->getOperand(1), Builder.getPtrTy());
1126 std::array<Value *, 5>
Args = {Row, Col, I8Ptr, Stride, Tile};
1127 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, Args);
1136bool X86LowerAMXCast::combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) {
1137 bool EraseLoad =
true;
1138 Value *Row =
nullptr, *Col =
nullptr;
1140 unsigned OpNo =
U.getOperandNo();
1149 Value *Stride = Builder.CreateSExt(Col, Builder.getInt64Ty());
1155 DT.reset(
new DominatorTree(Func));
1156 if (!DT->dominates(Row, LD) || !DT->dominates(Col, LD)) {
1160 Builder.SetInsertPoint(&*std::next(
LD->getIterator()));
1161 Builder.CreateStore(LD, AllocaAddr);
1163 Builder.SetInsertPoint(Cast);
1164 I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getPtrTy());
1167 I8Ptr = Builder.CreateBitCast(
LD->getOperand(0), Builder.getPtrTy());
1169 std::array<Value *, 4>
Args = {Row, Col, I8Ptr, Stride};
1172 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, Args);
1178bool X86LowerAMXCast::combineLdSt(SmallVectorImpl<Instruction *> &Casts) {
1179 bool Change =
false;
1180 for (
auto *Cast : Casts) {
1187 if (
II->getIntrinsicID() == Intrinsic::x86_cast_tile_to_vector) {
1188 SmallVector<Instruction *, 2> DeadStores;
1189 for (User *U : Cast->
users()) {
1198 for (
auto *Store : DeadStores)
1199 Store->eraseFromParent();
1202 if (!Load || !
Load->hasOneUse())
1212 Load->eraseFromParent();
1219bool X86LowerAMXCast::combineAMXcast(TargetLibraryInfo *TLI) {
1220 bool Change =
false;
1222 SmallVector<Instruction *, 8> Vec2TileInsts;
1223 SmallVector<Instruction *, 8> Tile2VecInsts;
1224 SmallVector<Instruction *, 8> PhiCastWorkList;
1225 SmallSetVector<Instruction *, 16> DeadInst;
1226 for (BasicBlock &BB : Func) {
1227 for (Instruction &
I : BB) {
1238 auto Convert = [&](SmallVectorImpl<Instruction *> &Insts,
Intrinsic::ID IID) {
1239 for (
auto *Inst : Insts) {
1240 for (User *U : Inst->
users()) {
1242 if (!
II ||
II->getIntrinsicID() != IID)
1251 II->replaceAllUsesWith(Inst->getOperand(0));
1257 Convert(Vec2TileInsts, Intrinsic::x86_cast_tile_to_vector);
1258 Convert(Tile2VecInsts, Intrinsic::x86_cast_vector_to_tile);
1260 SmallVector<Instruction *, 8> LiveCasts;
1261 auto EraseInst = [&](SmallVectorImpl<Instruction *> &Insts) {
1262 for (
auto *Inst : Insts) {
1264 Inst->eraseFromParent();
1272 EraseInst(Vec2TileInsts);
1273 EraseInst(Tile2VecInsts);
1274 LLVM_DEBUG(
dbgs() <<
"[LowerAMXTYpe][combineAMXcast] IR dump after combine "
1275 "Vec2Tile and Tile2Vec:\n";
1277 Change |= combineLdSt(LiveCasts);
1278 EraseInst(LiveCasts);
1279 LLVM_DEBUG(
dbgs() <<
"[LowerAMXTYpe][combineAMXcast] IR dump after combine "
1280 "AMXCast and load/store:\n";
1284 for (BasicBlock &BB : Func) {
1285 for (Instruction &
I : BB) {
1292 for (
auto *
I : PhiCastWorkList) {
1305 while (!DeadInst.
empty()) {
1309 LLVM_DEBUG(
dbgs() <<
"[LowerAMXTYpe][combineAMXcast] IR dump after "
1310 "optimizeAMXCastFromPhi:\n";
1317bool X86LowerAMXCast::transformAMXCast(IntrinsicInst *AMXCast) {
1319 AllocaInst *AllocaAddr;
1320 Value *I8Ptr, *Stride;
1323 auto Prepare = [&](
Type *MemTy) {
1325 I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getPtrTy());
1326 Stride = Builder.getInt64(64);
1347 unsigned OpNo =
U.getOperandNo();
1352 Builder.CreateStore(Src, AllocaAddr);
1354 Value *Row =
nullptr, *Col =
nullptr;
1356 std::array<Value *, 4>
Args = {
1357 Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty())};
1359 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, Args);
1374 Value *Row =
II->getOperand(0);
1375 Value *Col =
II->getOperand(1);
1376 std::array<Value *, 5>
Args = {
1377 Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty()), Src};
1378 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, Args);
1379 Value *NewInst = Builder.CreateLoad(AMXCast->
getType(), AllocaAddr);
1387bool X86LowerAMXCast::transformAllAMXCast() {
1388 bool Change =
false;
1390 SmallVector<Instruction *, 8> WorkLists;
1391 for (BasicBlock &BB : Func) {
1392 for (Instruction &
I : BB) {
1398 for (
auto *Inst : WorkLists) {
1409class X86LowerAMXTypeLegacyPass :
public FunctionPass {
1413 X86LowerAMXTypeLegacyPass() : FunctionPass(
ID) {}
1425 TargetMachine *
TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1426 TargetLibraryInfo *TLI =
1427 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1429 ShapeCalculator SC(TM);
1430 X86LowerAMXCast LAC(
F, &SC);
1431 C |= LAC.combineAMXcast(TLI);
1434 C |= LAC.transformAllAMXCast();
1436 X86LowerAMXType LAT(
F, &SC);
1442 if (
TM->getOptLevel() == CodeGenOptLevel::None) {
1447 if (!
F.hasFnAttribute(Attribute::OptimizeNone)) {
1448 X86VolatileTileData VTD(
F);
1449 C = VTD.volatileTileData() ||
C;
1456 void getAnalysisUsage(AnalysisUsage &AU)
const override {
1465static const char PassName[] =
"Lower AMX type for load/store";
1466char X86LowerAMXTypeLegacyPass::ID = 0;
1475 return new X86LowerAMXTypeLegacyPass();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool DCEInstruction(Instruction *I, SmallSetVector< Instruction *, 16 > &WorkList, const TargetLibraryInfo *TLI)
static bool runOnFunction(Function &F, bool PostInlining)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file implements a set that has insertion order iteration characteristics.
Target-Independent Code Generator Pass Configuration Options pass.
static const char PassName[]
static bool isAMXCast(Instruction *II)
static void replaceWithTileLoad(Use &U, Value *Ptr, bool IsPHI=false)
static Instruction * createTileStore(Instruction *TileDef, Value *Ptr)
static unsigned getNumDefTiles(IntrinsicInst *II)
static Value * getAllocaPos(BasicBlock *BB)
static bool containsAMXCode(Function &F)
static bool isIncomingOfPHI(Instruction *I)
static bool isAMXIntrinsic(Value *I)
static Instruction * getFirstNonAllocaInTheEntryBlock(Function &F)
static AllocaInst * createAllocaInstAtEntry(IRBuilder<> &Builder, BasicBlock *BB, Type *Ty)
Value * getRowFromCol(Instruction *II, Value *V, unsigned Granularity)
ShapeCalculator(TargetMachine *TargetM)
Value * getColFromRow(Instruction *II, Value *V, unsigned Granularity)
std::pair< Value *, Value * > getShape(IntrinsicInst *II, unsigned OpNo)
an instruction to allocate memory on the stack
void setAlignment(Align Align)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
This class represents a no-op cast from one type to another.
A parsed version of the target data layout string in and methods for querying it.
FunctionPass class - This class is used to implement most global optimizations.
Value * CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
ConstantInt * getInt16(uint16_t C)
Get a constant 16-bit value.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
value_type pop_back_val()
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
void push_back(const T &Elt)
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI Type * getX86_AMXTy(LLVMContext &C)
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< po_iterator< T > > post_order(const T &G)
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI FunctionPass * createX86LowerAMXTypePass()
The pass transforms load/store <256 x i32> to AMX load/store intrinsics or split the data to two <128...