32#include "llvm/IR/IntrinsicsAArch64.h"
42#define DEBUG_TYPE "aarch64-sve-intrinsic-opts"
49 bool runOnModule(
Module &M)
override;
53 bool coalescePTrueIntrinsicCalls(
BasicBlock &BB,
67void SVEIntrinsicOpts::getAnalysisUsage(
AnalysisUsage &AU)
const {
72char SVEIntrinsicOpts::ID = 0;
73static const char *
name =
"SVE intrinsics optimizations";
79 return new SVEIntrinsicOpts();
105 if (ConvertToUses.
empty())
115 if (IntrUser && IntrUser->getIntrinsicID() ==
116 Intrinsic::aarch64_sve_convert_from_svbool) {
120 if (IntrUserVTy->getElementCount().getKnownMinValue() >
121 PTrueVTy->getElementCount().getKnownMinValue())
133bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls(
134 BasicBlock &BB, SmallSetVector<IntrinsicInst *, 4> &PTrues) {
135 if (PTrues.
size() <= 1)
139 auto *MostEncompassingPTrue =
143 return PTrue1VTy->getElementCount().getKnownMinValue() <
144 PTrue2VTy->getElementCount().getKnownMinValue();
149 PTrues.
remove(MostEncompassingPTrue);
159 Builder.SetInsertPoint(&BB, ++MostEncompassingPTrue->getIterator());
161 auto *MostEncompassingPTrueVTy =
163 auto *ConvertToSVBool = Builder.CreateIntrinsic(
164 Intrinsic::aarch64_sve_convert_to_svbool, {MostEncompassingPTrueVTy},
165 {MostEncompassingPTrue});
167 bool ConvertFromCreated =
false;
168 for (
auto *PTrue : PTrues) {
173 if (MostEncompassingPTrueVTy != PTrueVTy) {
174 ConvertFromCreated =
true;
176 Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator());
177 auto *ConvertFromSVBool =
178 Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
179 {PTrueVTy}, {ConvertToSVBool});
180 PTrue->replaceAllUsesWith(ConvertFromSVBool);
182 PTrue->replaceAllUsesWith(MostEncompassingPTrue);
184 PTrue->eraseFromParent();
188 if (!ConvertFromCreated)
189 ConvertToSVBool->eraseFromParent();
242bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls(
243 SmallSetVector<Function *, 4> &Functions) {
246 for (
auto *
F : Functions) {
247 for (
auto &BB : *
F) {
248 SmallSetVector<IntrinsicInst *, 4> SVAllPTrues;
249 SmallSetVector<IntrinsicInst *, 4> SVPow2PTrues;
252 for (Instruction &
I : BB) {
257 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
260 const auto PTruePattern =
263 if (PTruePattern == AArch64SVEPredPattern::all)
264 SVAllPTrues.
insert(IntrI);
265 if (PTruePattern == AArch64SVEPredPattern::pow2)
266 SVPow2PTrues.
insert(IntrI);
269 Changed |= coalescePTrueIntrinsicCalls(BB, SVAllPTrues);
270 Changed |= coalescePTrueIntrinsicCalls(BB, SVPow2PTrues);
279bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *
I) {
280 auto *
F =
I->getFunction();
281 auto Attr =
F->getFnAttribute(Attribute::VScaleRange);
285 unsigned MinVScale = Attr.getVScaleRangeMin();
286 std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax();
288 if (!MaxVScale || MinVScale != MaxVScale)
293 auto *FixedPredType =
298 if (!Store || !
Store->isSimple())
302 if (
Store->getOperand(0)->getType() != FixedPredType)
307 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_extract)
320 if (BitCast->getOperand(0)->getType() != PredType)
324 Builder.SetInsertPoint(
I);
326 Builder.CreateStore(BitCast->getOperand(0),
Store->getPointerOperand());
328 Store->eraseFromParent();
329 if (IntrI->use_empty())
330 IntrI->eraseFromParent();
331 if (BitCast->use_empty())
332 BitCast->eraseFromParent();
339bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *
I) {
340 auto *
F =
I->getFunction();
341 auto Attr =
F->getFnAttribute(Attribute::VScaleRange);
345 unsigned MinVScale = Attr.getVScaleRangeMin();
346 std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax();
348 if (!MaxVScale || MinVScale != MaxVScale)
353 auto *FixedPredType =
358 if (!BitCast || BitCast->getType() != PredType)
363 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_insert)
373 if (!Load || !
Load->isSimple())
377 if (
Load->getType() != FixedPredType)
381 Builder.SetInsertPoint(Load);
383 auto *LoadPred = Builder.CreateLoad(PredType,
Load->getPointerOperand());
385 BitCast->replaceAllUsesWith(LoadPred);
386 BitCast->eraseFromParent();
387 if (IntrI->use_empty())
388 IntrI->eraseFromParent();
389 if (
Load->use_empty())
390 Load->eraseFromParent();
395bool SVEIntrinsicOpts::optimizeInstructions(
396 SmallSetVector<Function *, 4> &Functions) {
399 for (
auto *
F : Functions) {
400 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
405 ReversePostOrderTraversal<BasicBlock *> RPOT(Root);
406 for (
auto *BB : RPOT) {
408 switch (
I.getOpcode()) {
409 case Instruction::Store:
410 Changed |= optimizePredicateStore(&
I);
412 case Instruction::BitCast:
413 Changed |= optimizePredicateLoad(&
I);
423bool SVEIntrinsicOpts::optimizeFunctions(
424 SmallSetVector<Function *, 4> &Functions) {
427 Changed |= optimizePTrueIntrinsicCalls(Functions);
428 Changed |= optimizeInstructions(Functions);
433bool SVEIntrinsicOpts::runOnModule(
Module &M) {
435 SmallSetVector<Function *, 4> Functions;
440 for (
auto &
F :
M.getFunctionList()) {
441 if (!
F.isDeclaration())
444 switch (
F.getIntrinsicID()) {
445 case Intrinsic::vector_extract:
446 case Intrinsic::vector_insert:
447 case Intrinsic::aarch64_sve_ptrue:
448 for (User *U :
F.users())
456 if (!Functions.
empty())
457 Changed |= optimizeFunctions(Functions);
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isPTruePromoted(IntrinsicInst *PTrue)
Checks if a ptrue intrinsic call is promoted.
This file implements a set that has insertion order iteration characteristics.
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Legacy analysis pass which computes a DominatorTree.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
bool remove(const value_type &X)
Remove an item from the set vector.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
size_type size() const
Determine the number of elements in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BasicBlock
Various leaf nodes.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ModulePass * createSVEIntrinsicOptsPass()
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.