114#include <type_traits>
120#define DEBUG_TYPE "load-store-vectorizer"
122STATISTIC(NumVectorInstructions,
"Number of vector accesses generated");
123STATISTIC(NumScalarsVectorized,
"Number of scalar accesses vectorized");
133 std::tuple<
const Value * ,
139 const EqClassKey &K) {
142 <<
" of element size " << ElementSize <<
" bits in addrspace "
159 APInt OffsetFromLeader;
160 ChainElem(Instruction *Inst, APInt OffsetFromLeader)
161 : Inst(std::
move(Inst)), OffsetFromLeader(std::
move(OffsetFromLeader)) {}
165void sortChainInBBOrder(Chain &
C) {
166 sort(
C, [](
auto &
A,
auto &
B) {
return A.Inst->comesBefore(
B.Inst); });
169void sortChainInOffsetOrder(Chain &
C) {
170 sort(
C, [](
const auto &
A,
const auto &
B) {
171 if (
A.OffsetFromLeader !=
B.OffsetFromLeader)
172 return A.OffsetFromLeader.slt(
B.OffsetFromLeader);
173 return A.Inst->comesBefore(
B.Inst);
178 for (
const auto &
E :
C) {
179 dbgs() <<
" " << *
E.Inst <<
" (offset " <<
E.OffsetFromLeader <<
")\n";
183using EquivalenceClassMap =
187constexpr unsigned StackAdjustedAlignment = 4;
191 for (
const ChainElem &
E :
C)
198 return LI !=
nullptr && LI->
hasMetadata(LLVMContext::MD_invariant_load);
208 while (!Worklist.
empty()) {
211 for (
int Idx = 0; Idx < NumOperands; Idx++) {
213 if (!IM || IM->
getOpcode() == Instruction::PHI)
221 assert(IM !=
I &&
"Unexpected cycle while re-ordering instructions");
224 InstructionsToMove.
insert(IM);
231 for (
auto BBI =
I->getIterator(),
E =
I->getParent()->end(); BBI !=
E;) {
233 if (!InstructionsToMove.
contains(IM))
245 TargetTransformInfo &TTI;
246 const DataLayout &DL;
256 Vectorizer(Function &F,
AliasAnalysis &AA, AssumptionCache &AC,
257 DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)
258 : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),
259 DL(F.getDataLayout()), Builder(SE.
getContext()) {}
264 static const unsigned MaxDepth = 3;
273 bool runOnEquivalenceClass(
const EqClassKey &EqClassKey,
279 bool runOnChain(Chain &
C);
284 std::vector<Chain> splitChainByContiguity(Chain &
C);
289 std::vector<Chain> splitChainByMayAliasInstrs(Chain &
C);
293 std::vector<Chain> splitChainByAlignment(Chain &
C);
297 bool vectorizeChain(Chain &
C);
300 std::optional<APInt> getConstantOffset(
Value *PtrA,
Value *PtrB,
301 Instruction *ContextInst,
303 std::optional<APInt> getConstantOffsetComplexAddrs(
Value *PtrA,
Value *PtrB,
304 Instruction *ContextInst,
306 std::optional<APInt> getConstantOffsetSelects(
Value *PtrA,
Value *PtrB,
307 Instruction *ContextInst,
313 Type *getChainElemTy(
const Chain &
C);
322 template <
bool IsLoadChain>
324 Instruction *ChainElem, Instruction *ChainBegin,
325 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
326 BatchAAResults &BatchAA);
331 void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const;
349class LoadStoreVectorizerLegacyPass :
public FunctionPass {
353 LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {
360 StringRef getPassName()
const override {
361 return "GPU Load and Store Vectorizer";
364 void getAnalysisUsage(AnalysisUsage &AU)
const override {
376char LoadStoreVectorizerLegacyPass::ID = 0;
379 "Vectorize load and Store instructions",
false,
false)
387 "Vectorize load and store instructions",
false,
false)
390 return new LoadStoreVectorizerLegacyPass();
393bool LoadStoreVectorizerLegacyPass::runOnFunction(
Function &
F) {
395 if (skipFunction(
F) ||
F.hasFnAttribute(Attribute::NoImplicitFloat))
398 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
399 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
400 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
401 TargetTransformInfo &
TTI =
402 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
404 AssumptionCache &AC =
405 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
407 return Vectorizer(
F, AA, AC, DT, SE,
TTI).run();
413 if (
F.hasFnAttribute(Attribute::NoImplicitFloat))
428bool Vectorizer::run() {
455 for (
auto It = Barriers.
begin(), End = std::prev(Barriers.
end()); It != End;
457 Changed |= runOnPseudoBB(*It, *std::next(It));
462 I->eraseFromParent();
474 dbgs() <<
"LSV: Running on pseudo-BB [" << *Begin <<
" ... ";
475 if (End != Begin->getParent()->end())
478 dbgs() <<
"<BB end>";
483 for (
const auto &[EqClassKey, EqClass] :
484 collectEquivalenceClasses(Begin, End))
485 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
490bool Vectorizer::runOnEquivalenceClass(
const EqClassKey &EqClassKey,
495 dbgs() <<
"LSV: Running on equivalence class of size " << EqClass.
size()
496 <<
" keyed on " << EqClassKey <<
":\n";
497 for (Instruction *
I : EqClass)
498 dbgs() <<
" " << *
I <<
"\n";
501 std::vector<Chain> Chains = gatherChains(EqClass);
503 <<
" nontrivial chains.\n";);
504 for (Chain &
C : Chains)
509bool Vectorizer::runOnChain(Chain &
C) {
511 dbgs() <<
"LSV: Running on chain with " <<
C.size() <<
" instructions:\n";
522 for (
auto &
C : splitChainByMayAliasInstrs(
C))
523 for (
auto &
C : splitChainByContiguity(
C))
524 for (
auto &
C : splitChainByAlignment(
C))
529std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &
C) {
533 sortChainInBBOrder(
C);
536 dbgs() <<
"LSV: splitChainByMayAliasInstrs considering chain:\n";
544 for (
const auto &
E :
C)
545 ChainOffsets.insert({&*
E.Inst,
E.OffsetFromLeader});
549 BatchAAResults BatchAA(AA);
562 auto Impl = [&](
auto IsLoad) {
564 auto [ChainBegin, ChainEnd] = [&](
auto IsLoad) {
565 if constexpr (IsLoad())
566 return std::make_pair(
C.begin(),
C.end());
568 return std::make_pair(
C.rbegin(),
C.rend());
570 assert(ChainBegin != ChainEnd);
572 std::vector<Chain> Chains;
575 for (
auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
577 ChainOffsets, BatchAA)) {
578 LLVM_DEBUG(
dbgs() <<
"LSV: No intervening may-alias instrs; can merge "
579 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst
584 dbgs() <<
"LSV: Found intervening may-alias instrs; cannot merge "
585 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst <<
"\n");
586 if (NewChain.
size() > 1) {
588 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
591 Chains.emplace_back(std::move(NewChain));
598 if (NewChain.
size() > 1) {
600 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
603 Chains.emplace_back(std::move(NewChain));
609 return Impl(std::bool_constant<true>());
612 return Impl(std::bool_constant<false>());
615std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &
C) {
619 sortChainInOffsetOrder(
C);
622 dbgs() <<
"LSV: splitChainByContiguity considering chain:\n";
626 std::vector<Chain>
Ret;
627 Ret.push_back({
C.front()});
629 for (
auto It = std::next(
C.begin()), End =
C.end(); It != End; ++It) {
631 auto &CurChain =
Ret.back();
632 const ChainElem &Prev = CurChain.back();
634 assert(SzBits % 8 == 0 &&
"Non-byte sizes should have been filtered out by "
635 "collectEquivalenceClass");
636 APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8;
639 bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;
641 << (AreContiguous ?
"" :
"not ") <<
"contiguous: "
642 << *Prev.Inst <<
" (ends at offset " << PrevReadEnd
643 <<
") -> " << *It->Inst <<
" (starts at offset "
644 << It->OffsetFromLeader <<
")\n");
646 CurChain.push_back(*It);
648 Ret.push_back({*It});
652 llvm::erase_if(Ret, [](
const auto &Chain) {
return Chain.size() <= 1; });
656Type *Vectorizer::getChainElemTy(
const Chain &
C) {
669 if (
any_of(
C, [](
const ChainElem &
E) {
672 return Type::getIntNTy(
677 for (
const ChainElem &
E :
C)
683std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &
C) {
696 sortChainInOffsetOrder(
C);
699 dbgs() <<
"LSV: splitChainByAlignment considering chain:\n";
704 auto GetVectorFactor = [&](
unsigned VF,
unsigned LoadStoreSize,
707 ChainSizeBytes, VecTy)
709 ChainSizeBytes, VecTy);
713 for (
const auto &
E :
C) {
716 "Should have filtered out non-power-of-two elements in "
717 "collectEquivalenceClasses.");
724 std::vector<Chain>
Ret;
725 for (
unsigned CBegin = 0; CBegin <
C.size(); ++CBegin) {
730 for (
unsigned CEnd = CBegin + 1,
Size =
C.size(); CEnd <
Size; ++CEnd) {
731 APInt Sz =
C[CEnd].OffsetFromLeader +
733 C[CBegin].OffsetFromLeader;
734 if (Sz.
sgt(VecRegBytes))
736 CandidateChains.emplace_back(CEnd,
741 for (
auto It = CandidateChains.rbegin(), End = CandidateChains.rend();
743 auto [CEnd, SizeBytes] = *It;
745 dbgs() <<
"LSV: splitChainByAlignment considering candidate chain ["
746 << *
C[CBegin].Inst <<
" ... " << *
C[CEnd].Inst <<
"]\n");
748 Type *VecElemTy = getChainElemTy(
C);
752 unsigned VecElemBits =
DL.getTypeSizeInBits(VecElemTy);
755 assert((8 * SizeBytes) % VecElemBits == 0);
756 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
758 unsigned VF = 8 * VecRegBytes / VecElemBits;
761 unsigned TargetVF = GetVectorFactor(VF, VecElemBits,
762 VecElemBits * NumVecElems / 8, VecTy);
763 if (TargetVF != VF && TargetVF < NumVecElems) {
765 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
767 << TargetVF <<
" != VF=" << VF
768 <<
" and TargetVF < NumVecElems=" << NumVecElems <<
"\n");
776 auto IsAllowedAndFast = [&, SizeBytes = SizeBytes, &
TTI =
TTI,
778 if (Alignment.value() % SizeBytes == 0)
780 unsigned VectorizedSpeed = 0;
782 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
783 if (!AllowsMisaligned) {
785 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
786 << AS <<
" with alignment " << Alignment.value()
787 <<
" is misaligned, and therefore can't be vectorized.\n");
791 unsigned ElementwiseSpeed = 0;
792 (
TTI).allowsMisalignedMemoryAccesses((
F).
getContext(), VecElemBits, AS,
793 Alignment, &ElementwiseSpeed);
794 if (VectorizedSpeed < ElementwiseSpeed) {
796 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
797 << AS <<
" with alignment " << Alignment.value()
798 <<
" has relative speed " << VectorizedSpeed
799 <<
", which is lower than the elementwise speed of "
801 <<
". Therefore this access won't be vectorized.\n");
817 bool IsAllocaAccess = AS ==
DL.getAllocaAddrSpace() &&
820 Align PrefAlign =
Align(StackAdjustedAlignment);
821 if (IsAllocaAccess && Alignment.
value() % SizeBytes != 0 &&
822 IsAllowedAndFast(PrefAlign)) {
824 PtrOperand, PrefAlign,
DL,
C[CBegin].Inst,
nullptr, &DT);
825 if (NewAlign >= Alignment) {
827 <<
"LSV: splitByChain upgrading alloca alignment from "
828 << Alignment.
value() <<
" to " << NewAlign.
value()
830 Alignment = NewAlign;
834 if (!IsAllowedAndFast(Alignment)) {
836 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
837 "because its alignment is not AllowedAndFast: "
838 << Alignment.
value() <<
"\n");
847 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
848 "because !isLegalToVectorizeLoad/StoreChain.");
853 Chain &NewChain =
Ret.emplace_back();
854 for (
unsigned I = CBegin;
I <= CEnd; ++
I)
855 NewChain.emplace_back(
C[
I]);
863bool Vectorizer::vectorizeChain(Chain &
C) {
867 sortChainInOffsetOrder(
C);
870 dbgs() <<
"LSV: Vectorizing chain of " <<
C.size() <<
" instructions:\n";
874 Type *VecElemTy = getChainElemTy(
C);
877 unsigned ChainBytes = std::accumulate(
878 C.begin(),
C.end(), 0u, [&](
unsigned Bytes,
const ChainElem &
E) {
879 return Bytes + DL.getTypeStoreSize(getLoadStoreType(E.Inst));
881 assert(ChainBytes %
DL.getTypeStoreSize(VecElemTy) == 0);
885 VecElemTy, 8 * ChainBytes /
DL.getTypeSizeInBits(VecElemTy));
890 if (AS ==
DL.getAllocaAddrSpace()) {
891 Alignment = std::max(
894 MaybeAlign(),
DL,
C[0].Inst,
nullptr, &DT));
899 for (
const ChainElem &
E :
C)
901 DL.getTypeStoreSize(VecElemTy));
910 return A.Inst->comesBefore(
B.Inst);
920 for (
const ChainElem &
E :
C) {
928 VecIdx += VT->getNumElements();
934 if (
V->getType() !=
I->getType())
962 return A.Inst->comesBefore(
B.Inst);
968 auto InsertElem = [&](
Value *
V) {
969 if (
V->getType() != VecElemTy)
973 for (
const ChainElem &
E :
C) {
975 if (FixedVectorType *VT =
977 for (
int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
982 InsertElem(
I->getValueOperand());
996 for (
const ChainElem &
E :
C)
997 ToErase.emplace_back(
E.Inst);
999 ++NumVectorInstructions;
1000 NumScalarsVectorized +=
C.size();
1004template <
bool IsLoadChain>
1005bool Vectorizer::isSafeToMove(
1006 Instruction *ChainElem, Instruction *ChainBegin,
1007 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
1008 BatchAAResults &BatchAA) {
1009 LLVM_DEBUG(
dbgs() <<
"LSV: isSafeToMove(" << *ChainElem <<
" -> "
1010 << *ChainBegin <<
")\n");
1013 if (ChainElem == ChainBegin)
1021 auto BBIt = std::next([&] {
1022 if constexpr (IsLoadChain)
1027 auto BBItEnd = std::next([&] {
1028 if constexpr (IsLoadChain)
1034 const APInt &ChainElemOffset = ChainOffsets.
at(ChainElem);
1035 const unsigned ChainElemSize =
1038 for (; BBIt != BBItEnd; ++BBIt) {
1041 if (!
I->mayReadOrWriteMemory())
1058 if (
auto OffsetIt = ChainOffsets.
find(
I); OffsetIt != ChainOffsets.
end()) {
1065 const APInt &IOffset = OffsetIt->second;
1067 if (IOffset == ChainElemOffset ||
1068 (IOffset.
sle(ChainElemOffset) &&
1069 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1070 (ChainElemOffset.sle(IOffset) &&
1071 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1078 dbgs() <<
"LSV: Found alias in chain: " << *
I <<
"\n";
1090 <<
" Aliasing instruction:\n"
1091 <<
" " << *
I <<
'\n'
1092 <<
" Aliased instruction and pointer:\n"
1093 <<
" " << *ChainElem <<
'\n'
1111 unsigned MatchingOpIdxB,
bool Signed) {
1112 LLVM_DEBUG(
dbgs() <<
"LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1113 <<
", AddOpA=" << *AddOpA <<
", MatchingOpIdxA="
1114 << MatchingOpIdxA <<
", AddOpB=" << *AddOpB
1115 <<
", MatchingOpIdxB=" << MatchingOpIdxB
1116 <<
", Signed=" <<
Signed <<
"\n");
1132 AddOpB->
getOpcode() == Instruction::Add &&
1136 Value *OtherOperandA = AddOpA->
getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1137 Value *OtherOperandB = AddOpB->
getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1141 if (OtherInstrB && OtherInstrB->
getOpcode() == Instruction::Add &&
1146 if (OtherInstrB->
getOperand(0) == OtherOperandA &&
1151 if (OtherInstrA && OtherInstrA->
getOpcode() == Instruction::Add &&
1156 if (OtherInstrA->
getOperand(0) == OtherOperandB &&
1162 if (OtherInstrA && OtherInstrB &&
1163 OtherInstrA->
getOpcode() == Instruction::Add &&
1164 OtherInstrB->
getOpcode() == Instruction::Add &&
1181std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
1183 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1184 <<
" PtrB=" << *PtrB <<
" ContextInst=" << *ContextInst
1185 <<
" Depth=" <<
Depth <<
"\n");
1189 return getConstantOffsetSelects(PtrA, PtrB, ContextInst,
Depth);
1193 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1194 GEPA->getPointerOperand() != GEPB->getPointerOperand())
1195 return std::nullopt;
1198 for (
unsigned I = 0,
E = GEPA->getNumIndices() - 1;
I <
E; ++
I) {
1200 return std::nullopt;
1209 return std::nullopt;
1215 return std::nullopt;
1223 return std::nullopt;
1225 const SCEV *OffsetSCEVA = SE.
getSCEV(ValA);
1226 const SCEV *OffsetSCEVB = SE.
getSCEV(OpB);
1227 const SCEV *IdxDiffSCEV = SE.
getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1229 return std::nullopt;
1233 return std::nullopt;
1236 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1244 if (OpB->
getOpcode() == Instruction::Add &&
1253 if (!Safe && OpA && OpA->
getOpcode() == Instruction::Add &&
1259 for (
unsigned MatchingOpIdxA : {0, 1})
1260 for (
unsigned MatchingOpIdxB : {0, 1})
1281 APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.
getBitWidth());
1284 Safe = BitsAllowedToBeSet.
uge(IdxDiff.
abs());
1288 return IdxDiff * Stride;
1289 return std::nullopt;
1292std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1294 if (
Depth++ == MaxDepth)
1295 return std::nullopt;
1299 if (SelectA->getCondition() != SelectB->getCondition())
1300 return std::nullopt;
1301 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1302 <<
", PtrB=" << *PtrB <<
", ContextInst="
1303 << *ContextInst <<
", Depth=" <<
Depth <<
"\n");
1304 std::optional<APInt> TrueDiff = getConstantOffset(
1305 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst,
Depth);
1307 return std::nullopt;
1308 std::optional<APInt> FalseDiff =
1309 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1310 ContextInst,
Depth);
1311 if (TrueDiff == FalseDiff)
1315 return std::nullopt;
1318void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const {
1319 if (EQClasses.size() < 2)
1324 static_assert(std::tuple_size_v<EqClassKey> == 4,
1325 "EqClassKey has changed - EqClassReducedKey needs changes too");
1326 using EqClassReducedKey =
1327 std::tuple<std::tuple_element_t<1, EqClassKey> ,
1328 std::tuple_element_t<2, EqClassKey> ,
1329 std::tuple_element_t<3, EqClassKey> >;
1330 using ECReducedKeyToUnderlyingObjectMap =
1331 MapVector<EqClassReducedKey,
1332 SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;
1337 ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;
1338 bool FoundPotentiallyOptimizableEC =
false;
1339 for (
const auto &EC : EQClasses) {
1340 const auto &
Key =
EC.first;
1341 EqClassReducedKey RedKey{std::get<1>(
Key), std::get<2>(
Key),
1343 auto &UOMap = RedKeyToUOMap[RedKey];
1345 if (UOMap.size() > 1)
1346 FoundPotentiallyOptimizableEC =
true;
1348 if (!FoundPotentiallyOptimizableEC)
1352 dbgs() <<
"LSV: mergeEquivalenceClasses: before merging:\n";
1353 for (
const auto &EC : EQClasses) {
1354 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1355 for (
const auto &Inst :
EC.second)
1356 dbgs() <<
" Inst: " << *Inst <<
'\n';
1360 dbgs() <<
"LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";
1361 for (
const auto &RedKeyToUO : RedKeyToUOMap) {
1362 dbgs() <<
" Reduced key: {" << std::get<0>(RedKeyToUO.first) <<
", "
1363 << std::get<1>(RedKeyToUO.first) <<
", "
1364 <<
static_cast<int>(std::get<2>(RedKeyToUO.first)) <<
"} --> "
1365 << RedKeyToUO.second.size() <<
" underlying objects:\n";
1366 for (
auto UObject : RedKeyToUO.second)
1367 dbgs() <<
" " << *UObject <<
'\n';
1371 using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;
1374 auto GetUltimateTargets =
1375 [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {
1376 UObjectToUObjectMap IndirectionMap;
1377 for (
const auto *UObject : UObjects) {
1378 const unsigned MaxLookupDepth = 1;
1380 if (UltimateTarget != UObject)
1381 IndirectionMap[UObject] = UltimateTarget;
1383 UObjectToUObjectMap UltimateTargetsMap;
1384 for (
const auto *UObject : UObjects) {
1386 auto It = IndirectionMap.find(Target);
1387 for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))
1389 UltimateTargetsMap[UObject] =
Target;
1391 return UltimateTargetsMap;
1396 for (
auto &[RedKey, UObjects] : RedKeyToUOMap) {
1397 if (UObjects.size() < 2)
1399 auto UTMap = GetUltimateTargets(UObjects);
1400 for (
const auto &[UObject, UltimateTarget] : UTMap) {
1401 if (UObject == UltimateTarget)
1404 EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),
1405 std::get<2>(RedKey)};
1406 EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),
1407 std::get<2>(RedKey)};
1410 const auto &VecTo = EQClasses[KeyTo];
1411 const auto &VecFrom = EQClasses[KeyFrom];
1412 SmallVector<Instruction *, 8> MergedVec;
1413 std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),
1414 std::back_inserter(MergedVec),
1415 [](Instruction *
A, Instruction *
B) {
1416 return A && B && A->comesBefore(B);
1418 EQClasses[KeyTo] = std::move(MergedVec);
1419 EQClasses.erase(KeyFrom);
1423 dbgs() <<
"LSV: mergeEquivalenceClasses: after merging:\n";
1424 for (
const auto &EC : EQClasses) {
1425 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1426 for (
const auto &Inst :
EC.second)
1427 dbgs() <<
" Inst: " << *Inst <<
'\n';
1435 EquivalenceClassMap
Ret;
1437 auto GetUnderlyingObject = [](
const Value *
Ptr) ->
const Value * {
1446 return Sel->getCondition();
1457 if ((LI && !LI->
isSimple()) || (SI && !
SI->isSimple()))
1470 unsigned TySize =
DL.getTypeSizeInBits(Ty);
1471 if ((TySize % 8) != 0)
1482 unsigned AS =
Ptr->getType()->getPointerAddressSpace();
1485 unsigned VF = VecRegSize / TySize;
1490 (VecTy && !
isPowerOf2_32(
DL.getTypeSizeInBits(VecTy->getScalarType()))))
1494 if (TySize > VecRegSize / 2 ||
1498 Ret[{GetUnderlyingObject(
Ptr), AS,
1504 mergeEquivalenceClasses(Ret);
1513 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1517 for (
size_t I = 1;
I < Instrs.
size(); ++
I) {
1518 assert(Instrs[
I - 1]->comesBefore(Instrs[
I]));
1527 struct InstrListElem : ilist_node<InstrListElem>,
1528 std::pair<Instruction *, Chain> {
1529 explicit InstrListElem(Instruction *
I)
1532 struct InstrListElemDenseMapInfo {
1533 using PtrInfo = DenseMapInfo<InstrListElem *>;
1534 using IInfo = DenseMapInfo<Instruction *>;
1535 static InstrListElem *getEmptyKey() {
return PtrInfo::getEmptyKey(); }
1536 static InstrListElem *getTombstoneKey() {
1537 return PtrInfo::getTombstoneKey();
1539 static unsigned getHashValue(
const InstrListElem *
E) {
1540 return IInfo::getHashValue(
E->first);
1542 static bool isEqual(
const InstrListElem *
A,
const InstrListElem *
B) {
1543 if (
A == getEmptyKey() ||
B == getEmptyKey())
1544 return A == getEmptyKey() &&
B == getEmptyKey();
1545 if (
A == getTombstoneKey() ||
B == getTombstoneKey())
1546 return A == getTombstoneKey() &&
B == getTombstoneKey();
1547 return IInfo::isEqual(
A->first,
B->first);
1550 SpecificBumpPtrAllocator<InstrListElem>
Allocator;
1551 simple_ilist<InstrListElem> MRU;
1552 DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;
1557 for (Instruction *
I : Instrs) {
1558 constexpr int MaxChainsToTry = 64;
1560 bool MatchFound =
false;
1561 auto ChainIter = MRU.
begin();
1562 for (
size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.
end();
1564 if (std::optional<APInt>
Offset = getConstantOffset(
1568 (ChainIter->first->comesBefore(
I) ?
I : ChainIter->first))) {
1571 ChainIter->second.emplace_back(
I,
Offset.value());
1581 APInt ZeroOffset(ASPtrBits, 0);
1582 InstrListElem *
E =
new (
Allocator.Allocate()) InstrListElem(
I);
1583 E->second.emplace_back(
I, ZeroOffset);
1589 std::vector<Chain>
Ret;
1593 if (
E.second.size() > 1)
1594 Ret.emplace_back(std::move(
E.second));
1598std::optional<APInt> Vectorizer::getConstantOffset(
Value *PtrA,
Value *PtrB,
1599 Instruction *ContextInst,
1602 <<
", PtrB=" << *PtrB <<
", ContextInst= " << *ContextInst
1603 <<
", Depth=" <<
Depth <<
"\n");
1606 unsigned OrigBitWidth =
DL.getIndexTypeSizeInBits(PtrA->
getType());
1607 APInt OffsetA(OrigBitWidth, 0);
1608 APInt OffsetB(OrigBitWidth, 0);
1611 unsigned NewPtrBitWidth =
DL.getTypeStoreSizeInBits(PtrA->
getType());
1612 if (NewPtrBitWidth !=
DL.getTypeStoreSizeInBits(PtrB->
getType()))
1613 return std::nullopt;
1618 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1619 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1621 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1622 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1624 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1629 LLVM_DEBUG(
dbgs() <<
"LSV: SCEV PtrB - PtrA =" << *DistScev <<
"\n");
1635 return (OffsetB - OffsetA + Dist).
sextOrTrunc(OrigBitWidth);
1638 if (std::optional<APInt> Diff =
1639 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst,
Depth))
1640 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1641 .sextOrTrunc(OrigBitWidth);
1642 return std::nullopt;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
static bool checkNoWrapFlags(Instruction *I, bool Signed)
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
static bool isInvariantLoad(const LoadInst *LI, const bool IsKernelFn)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)
Check if it's safe to move From down to To, checking that no physical registers are clobbered.
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt abs() const
Get the absolute value.
bool sgt(const APInt &RHS) const
Signed greater than comparison.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
iterator find(const_arg_type_t< KeyT > Val)
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
Legacy wrapper pass to provide the GlobalsAAResult object.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
An instruction for reading from memory.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getCouldNotCompute()
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
TypeSize getSequentialElementStride(const DataLayout &DL) const
Value * getOperand() const
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
iterator_range< po_iterator< T > > post_order(const T &G)
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
bool isModOrRefSet(const ModRefInfo MRI)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
uint64_t value() const
This is a hole in the type system and should not be abused.