78#define DEBUG_TYPE "interleaved-access"
81 "lower-interleaved-accesses",
82 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
87class InterleavedAccessImpl {
88 friend class InterleavedAccess;
91 InterleavedAccessImpl() =
default;
93 : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
97 DominatorTree *DT =
nullptr;
98 const TargetLowering *TLI =
nullptr;
101 unsigned MaxFactor = 0
u;
104 bool lowerInterleavedLoad(Instruction *Load,
105 SmallSetVector<Instruction *, 32> &DeadInsts);
108 bool lowerInterleavedStore(Instruction *Store,
109 SmallSetVector<Instruction *, 32> &DeadInsts);
113 bool lowerDeinterleaveIntrinsic(IntrinsicInst *
II,
114 SmallSetVector<Instruction *, 32> &DeadInsts);
118 bool lowerInterleaveIntrinsic(IntrinsicInst *
II,
119 SmallSetVector<Instruction *, 32> &DeadInsts);
134 SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
139 InterleavedAccessImpl Impl;
144 InterleavedAccess() : FunctionPass(ID) {
148 StringRef getPassName()
const override {
return "Interleaved Access Pass"; }
152 void getAnalysisUsage(AnalysisUsage &AU)
const override {
163 auto *TLI = TM->getSubtargetImpl(
F)->getTargetLowering();
164 InterleavedAccessImpl Impl(DT, TLI);
165 bool Changed = Impl.runOnFunction(
F);
175char InterleavedAccess::ID = 0;
177bool InterleavedAccess::runOnFunction(
Function &
F) {
181 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
185 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
187 Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
189 Impl.TLI = TM.getSubtargetImpl(
F)->getTargetLowering();
190 Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
192 return Impl.runOnFunction(
F);
196 "Lower interleaved memory accesses to target specific intrinsics",
false,
200 "Lower interleaved memory accesses to target specific intrinsics",
false,
204 return new InterleavedAccess();
213 unsigned &Index,
unsigned MaxFactor,
214 unsigned NumLoadElements) {
219 for (Factor = 2; Factor <= MaxFactor; Factor++) {
221 if (Mask.size() * Factor > NumLoadElements)
242 unsigned MaxFactor) {
248 for (Factor = 2; Factor <= MaxFactor; Factor++) {
257 switch (
II->getIntrinsicID()) {
260 case Intrinsic::vp_load:
261 return II->getOperand(1);
262 case Intrinsic::masked_load:
263 return II->getOperand(2);
264 case Intrinsic::vp_store:
265 return II->getOperand(2);
266 case Intrinsic::masked_store:
267 return II->getOperand(3);
276static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
279static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
281 return getMask(WideMask, Factor, LeafValueTy->getElementCount());
284bool InterleavedAccessImpl::lowerInterleavedLoad(
294 if (LI && !LI->isSimple())
315 if (!BI->user_empty() &&
all_of(BI->users(), [](
auto *U) {
316 auto *SVI = dyn_cast<ShuffleVectorInst>(U);
317 return SVI && isa<UndefValue>(SVI->getOperand(1));
319 for (
auto *SVI : BI->users())
331 if (Shuffles.
empty() && BinOpShuffles.
empty())
334 unsigned Factor,
Index;
336 unsigned NumLoadElements =
338 auto *FirstSVI = Shuffles.
size() > 0 ? Shuffles[0] : BinOpShuffles[0];
351 for (
auto *Shuffle : Shuffles) {
352 if (Shuffle->getType() != VecTy)
355 Shuffle->getShuffleMask(), Factor, Index))
358 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
361 for (
auto *Shuffle : BinOpShuffles) {
362 if (Shuffle->getType() != VecTy)
365 Shuffle->getShuffleMask(), Factor, Index))
368 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
378 if (!tryReplaceExtracts(Extracts, Shuffles))
381 bool BinOpShuffleChanged =
382 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
387 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *Load <<
"\n");
394 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved vp.load or masked.load: "
397 <<
" and actual factor " << GapMask.popcount() <<
"\n");
403 Indices, Factor, GapMask))
405 return !Extracts.
empty() || BinOpShuffleChanged;
413bool InterleavedAccessImpl::replaceBinOpShuffles(
416 for (
auto *SVI : BinOpShuffles) {
421 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
427 Mask, SVI->getName(), insertPos);
430 SVI->getName(), insertPos);
433 SVI->replaceAllUsesWith(NewBI);
435 <<
"\n With : " << *NewSVI1 <<
"\n And : "
436 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
438 if (NewSVI1->getOperand(0) == Load)
440 if (NewSVI2->getOperand(0) == Load)
444 return !BinOpShuffles.empty();
447bool InterleavedAccessImpl::tryReplaceExtracts(
452 if (Extracts.
empty())
459 for (
auto *Extract : Extracts) {
462 auto Index = IndexOperand->getSExtValue();
467 for (
auto *Shuffle : Shuffles) {
470 if (!DT->dominates(Shuffle, Extract))
477 Shuffle->getShuffleMask(Indices);
478 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
479 if (Indices[
I] == Index) {
480 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
481 "Vector operations do not match");
482 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
487 if (ReplacementMap.
count(Extract))
493 if (!ReplacementMap.
count(Extract))
499 for (
auto &Replacement : ReplacementMap) {
500 auto *Extract = Replacement.first;
501 auto *
Vector = Replacement.second.first;
502 auto Index = Replacement.second.second;
503 Builder.SetInsertPoint(Extract);
504 Extract->replaceAllUsesWith(Builder.CreateExtractElement(
Vector, Index));
505 Extract->eraseFromParent();
511bool InterleavedAccessImpl::lowerInterleavedStore(
519 StoredValue =
SI->getValueOperand();
521 assert(
II->getIntrinsicID() == Intrinsic::vp_store ||
522 II->getIntrinsicID() == Intrinsic::masked_store);
523 StoredValue =
II->getArgOperand(0);
530 unsigned NumStoredElements =
536 assert(NumStoredElements % Factor == 0 &&
537 "number of stored element should be a multiple of Factor");
542 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *Store <<
"\n");
545 unsigned LaneMaskLen = NumStoredElements / Factor;
551 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved vp.store or masked.store: "
554 <<
" and actual factor " << GapMask.popcount() <<
"\n");
559 if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor, GapMask))
574 unsigned LeafMaskLen,
APInt &GapMask) {
576 for (
unsigned F = 0U;
F < Factor; ++
F) {
578 for (
unsigned Idx = 0U; Idx < LeafMaskLen; ++Idx) {
580 if (!
C->isZeroValue()) {
591static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
598 Value *RefArg =
nullptr;
601 for (
auto [Idx, Arg] :
enumerate(IMI->args())) {
603 GapMask.clearBit(Idx);
609 else if (RefArg != Arg)
610 return {
nullptr, GapMask};
616 return {RefArg ? RefArg : IMI->getArgOperand(0), GapMask};
622 AndOp && AndOp->getOpcode() == Instruction::And) {
623 auto [MaskLHS, GapMaskLHS] =
624 getMask(AndOp->getOperand(0), Factor, LeafValueEC);
625 auto [MaskRHS, GapMaskRHS] =
626 getMask(AndOp->getOperand(1), Factor, LeafValueEC);
627 if (!MaskLHS || !MaskRHS)
628 return {
nullptr, GapMask};
631 return {
IRBuilder<>(AndOp).CreateAnd(MaskLHS, MaskRHS),
632 GapMaskLHS & GapMaskRHS};
636 if (
auto *
Splat = ConstMask->getSplatValue())
643 getGapMask(*ConstMask, Factor, LeafMaskLen, GapMask);
649 for (
unsigned Idx = 0U; Idx < LeafMaskLen * Factor; ++Idx) {
650 if (!GapMask[Idx % Factor])
652 Constant *
C = ConstMask->getAggregateElement(Idx);
653 if (LeafMask[Idx / Factor] && LeafMask[Idx / Factor] !=
C)
654 return {
nullptr, GapMask};
655 LeafMask[Idx / Factor] =
C;
663 Type *Op1Ty = SVI->getOperand(1)->getType();
665 return {
nullptr, GapMask};
670 unsigned NumSrcElts =
674 NumSrcElts * 2, StartIndexes) &&
675 llvm::all_of(StartIndexes, [](
unsigned Start) { return Start == 0; }) &&
676 llvm::all_of(SVI->getShuffleMask(), [&NumSrcElts](
int Idx) {
677 return Idx < (int)NumSrcElts;
682 return {Builder.CreateExtractVector(LeafMaskTy, SVI->getOperand(0),
688 return {
nullptr, GapMask};
691bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
694 if (!LoadedVal || !LoadedVal->
hasOneUse())
703 assert(Factor &&
"unexpected deinterleave intrinsic");
710 LLVM_DEBUG(
dbgs() <<
"IA: Found a load with deinterleave intrinsic " << *DI
711 <<
" and factor = " << Factor <<
"\n");
714 if (
II->getIntrinsicID() != Intrinsic::masked_load &&
715 II->getIntrinsicID() != Intrinsic::vp_load)
719 APInt GapMask(Factor, 0);
720 std::tie(Mask, GapMask) =
727 if (GapMask.popcount() != Factor)
730 LLVM_DEBUG(
dbgs() <<
"IA: Found a vp.load or masked.load with deinterleave"
731 <<
" intrinsic " << *DI <<
" and factor = "
736 if (!TLI->lowerDeinterleaveIntrinsicToLoad(LoadedVal, Mask, DI))
741 DeadInsts.
insert(LoadedVal);
745bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
759 assert(Factor &&
"unexpected interleave intrinsic");
763 if (
II->getIntrinsicID() != Intrinsic::masked_store &&
764 II->getIntrinsicID() != Intrinsic::vp_store)
767 APInt GapMask(Factor, 0);
768 std::tie(Mask, GapMask) =
775 if (GapMask.popcount() != Factor)
778 LLVM_DEBUG(
dbgs() <<
"IA: Found a vp.store or masked.store with interleave"
779 <<
" intrinsic " << *IntII <<
" and factor = "
785 LLVM_DEBUG(
dbgs() <<
"IA: Found a store with interleave intrinsic "
786 << *IntII <<
" and factor = " << Factor <<
"\n");
790 if (!TLI->lowerInterleaveIntrinsicToStore(StoredBy, Mask, InterleaveValues))
794 DeadInsts.
insert(StoredBy);
799bool InterleavedAccessImpl::runOnFunction(
Function &
F) {
809 Changed |= lowerInterleavedLoad(&
I, DeadInsts);
814 Changed |= lowerInterleavedStore(&
I, DeadInsts);
818 Changed |= lowerDeinterleaveIntrinsic(
II, DeadInsts);
820 Changed |= lowerInterleaveIntrinsic(
II, DeadInsts);
824 for (
auto *
I : DeadInsts)
825 I->eraseFromParent();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static void getGapMask(const Constant &MaskConst, unsigned Factor, unsigned LeafMaskLen, APInt &GapMask)
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
static Value * getMaskOperand(IntrinsicInst *II)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
This file contains the declaration of the InterleavedAccessPass class, its corresponding pass name is...
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
unsigned getBitWidth() const
Return the number of bits in the APInt.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Represents analyses that only rely on functions' control flow.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getFixed(ScalarTy MinVal)
FunctionPass class - This class is used to implement most global optimizations.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
void insert_range(Range &&R)
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
LLVM_ABI bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void initializeInterleavedAccessPass(PassRegistry &)
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.