23#include "llvm/IR/IntrinsicsAMDGPU.h"
29#define DEBUG_TYPE "amdgpu-late-codegenprepare"
38 WidenLoads(
"amdgpu-late-codegenprepare-widen-constant-loads",
39 cl::desc(
"Widen sub-dword constant address space loads in "
40 "AMDGPULateCodeGenPrepare"),
45class AMDGPULateCodeGenPrepare
46 :
public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
59 :
F(
F),
DL(
F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
61 bool visitInstruction(Instruction &) {
return false; }
64 bool isDWORDAligned(
const Value *V)
const {
69 bool canWidenScalarExtLoad(LoadInst &LI)
const;
70 bool visitLoadInst(LoadInst &LI);
75class LiveRegOptimizer {
79 const GCNSubtarget &ST;
82 Type *
const ConvertToScalar;
86 DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap;
91 Type *calculateConvertType(
Type *OriginalType);
98 Value *convertFromOptType(
Type *ConvertType, Instruction *V,
100 BasicBlock *InsertBlock);
104 bool optimizeLiveType(Instruction *
I,
105 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
109 bool shouldReplace(
Type *ITy) {
114 const auto *TLI = ST.getTargetLowering();
131 bool isCoercionProfitable(Instruction *
II) {
132 SmallPtrSet<Instruction *, 4> CVisited;
133 SmallVector<Instruction *, 4> UserList;
137 for (User *V :
II->users())
143 return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm;
144 return isa<PHINode, ShuffleVectorInst, InsertElementInst,
145 ExtractElementInst, CastInst>(
II);
148 while (!UserList.
empty()) {
150 if (!CVisited.
insert(CII).second)
153 if (CII->getParent() ==
II->getParent() && !IsLookThru(
II))
160 for (User *V : CII->users())
167 LiveRegOptimizer(
Module &Mod,
const GCNSubtarget &ST)
168 : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
174bool AMDGPULateCodeGenPrepare::run() {
182 LiveRegOptimizer LRO(*
F.getParent(), ST);
186 bool HasScalarSubwordLoads =
ST.hasScalarSubwordLoads();
191 Changed |= LRO.optimizeLiveType(&
I, DeadInsts);
198Type *LiveRegOptimizer::calculateConvertType(
Type *OriginalType) {
204 TypeSize OriginalSize =
DL.getTypeSizeInBits(VTy);
205 TypeSize ConvertScalarSize =
DL.getTypeSizeInBits(ConvertToScalar);
206 unsigned ConvertEltCount =
207 (OriginalSize + ConvertScalarSize - 1) / ConvertScalarSize;
209 if (OriginalSize <= ConvertScalarSize)
212 return VectorType::get(Type::getIntNTy(
Mod.getContext(), ConvertScalarSize),
213 ConvertEltCount,
false);
216Value *LiveRegOptimizer::convertToOptType(Instruction *V,
219 Type *NewTy = calculateConvertType(
V->getType());
221 TypeSize OriginalSize =
DL.getTypeSizeInBits(VTy);
222 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
227 if (OriginalSize == NewSize)
228 return Builder.CreateBitCast(V, NewTy,
V->getName() +
".bc");
231 assert(NewSize > OriginalSize);
234 SmallVector<int, 8> ShuffleMask;
236 for (
unsigned I = 0;
I < OriginalElementCount;
I++)
239 for (uint64_t
I = OriginalElementCount;
I < ExpandedVecElementCount;
I++)
240 ShuffleMask.
push_back(OriginalElementCount);
242 Value *ExpandedVec = Builder.CreateShuffleVector(V, ShuffleMask);
243 return Builder.CreateBitCast(ExpandedVec, NewTy,
V->getName() +
".bc");
246Value *LiveRegOptimizer::convertFromOptType(
Type *ConvertType, Instruction *V,
248 BasicBlock *InsertBB) {
251 TypeSize OriginalSize =
DL.getTypeSizeInBits(
V->getType());
252 TypeSize NewSize =
DL.getTypeSizeInBits(NewVTy);
256 if (OriginalSize == NewSize)
257 return Builder.CreateBitCast(V, NewVTy,
V->getName() +
".bc");
261 assert(OriginalSize > NewSize);
263 if (!
V->getType()->isVectorTy()) {
278 SmallVector<int, 8> ShuffleMask(NarrowElementCount);
279 std::iota(ShuffleMask.
begin(), ShuffleMask.
end(), 0);
281 return Builder.CreateShuffleVector(Converted, ShuffleMask);
284bool LiveRegOptimizer::optimizeLiveType(
285 Instruction *
I, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
286 SmallVector<Instruction *, 4> Worklist;
287 SmallPtrSet<PHINode *, 4> PhiNodes;
288 SmallPtrSet<Instruction *, 4> Defs;
289 SmallPtrSet<Instruction *, 4>
Uses;
290 SmallPtrSet<Instruction *, 4> Visited;
293 while (!Worklist.
empty()) {
299 if (!shouldReplace(
II->getType()))
302 if (!isCoercionProfitable(
II))
308 for (
Value *V :
Phi->incoming_values()) {
311 if (!PhiNodes.
count(OpPhi) && !Visited.
count(OpPhi))
328 for (User *V :
II->users()) {
331 if (!PhiNodes.
count(OpPhi) && !Visited.
count(OpPhi))
339 Uses.insert(UseInst);
347 for (Instruction *
D : Defs) {
350 Value *ConvertVal = convertToOptType(
D, InsertPt);
352 ValMap[
D] = ConvertVal;
357 for (PHINode *Phi : PhiNodes) {
359 Phi->getNumIncomingValues(),
360 Phi->getName() +
".tc",
Phi->getIterator());
364 for (PHINode *Phi : PhiNodes) {
366 bool MissingIncVal =
false;
367 for (
int I = 0,
E =
Phi->getNumIncomingValues();
I <
E;
I++) {
368 Value *IncVal =
Phi->getIncomingValue(
I);
370 Type *NewType = calculateConvertType(
Phi->getType());
371 NewPhi->
addIncoming(ConstantInt::get(NewType, 0,
false),
372 Phi->getIncomingBlock(
I));
376 MissingIncVal =
true;
383 SmallPtrSet<Value *, 4> VisitedPhis;
385 while (!PHIWorklist.
empty()) {
387 VisitedPhis.
insert(NextDeadValue);
389 llvm::find_if(PhiNodes, [
this, &NextDeadValue](PHINode *CandPhi) {
390 return ValMap[CandPhi] == NextDeadValue;
394 if (OriginalPhi != PhiNodes.end())
395 ValMap.
erase(*OriginalPhi);
399 for (User *U : NextDeadValue->
users()) {
409 for (Instruction *U :
Uses) {
413 Value *NewVal =
nullptr;
414 if (BBUseValMap.
contains(
U->getParent()) &&
415 BBUseValMap[
U->getParent()].contains(Val))
416 NewVal = BBUseValMap[
U->getParent()][Val];
428 InsertPt,
U->getParent());
429 BBUseValMap[
U->getParent()][ValMap[
Op]] = NewVal;
433 U->setOperand(
OpIdx, NewVal);
441bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI)
const {
454 unsigned TySize =
DL.getTypeStoreSize(Ty);
465bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
474 if (!canWidenScalarExtLoad(LI))
482 if (!isDWORDAligned(
Base))
485 int64_t Adjust =
Offset & 0x3;
496 unsigned LdBits =
DL.getTypeStoreSizeInBits(LI.
getType());
497 auto *IntNTy = Type::getIntNTy(LI.
getContext(), LdBits);
499 auto *NewPtr = IRB.CreateConstGEP1_64(
504 LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr,
Align(4));
506 NewLd->
setMetadata(LLVMContext::MD_range,
nullptr);
508 unsigned ShAmt = Adjust * 8;
509 Value *NewVal = IRB.CreateBitCast(
510 IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt),
511 DL.typeSizeEqualsStoreSize(LI.
getType()) ? IntNTy
526 bool Changed = AMDGPULateCodeGenPrepare(
F, ST, &AC, UI).run();
542 return "AMDGPU IR late optimizations";
569 return AMDGPULateCodeGenPrepare(
F, ST, &AC, UI).run();
573 "AMDGPU IR late optimizations",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
Machine Check Debug Module
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
Target-Independent Code Generator Pass Configuration Options pass.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
AMDGPULateCodeGenPrepareLegacy()
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::iterator iterator
Instruction iterators...
Represents analyses that only rely on functions' control flow.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
FunctionPass class - This class is used to implement most global optimizations.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
bool isAggregateType() const
Return true if the type is an aggregate type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
FunctionAddr VTableAddr Value
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto reverse(ContainerTy &&C)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
FunctionPass * createAMDGPULateCodeGenPrepareLegacyPass()
DenseMap< const Value *, Value * > ValueToValueMap
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.