34#define DEBUG_TYPE "si-memory-legalizer"
35#define PASS_NAME "SI Memory Legalizer"
39 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
61enum class SIAtomicScope {
73enum class SIAtomicAddrSpace {
82 FLAT = GLOBAL |
LDS | SCRATCH,
85 ATOMIC = GLOBAL |
LDS | SCRATCH | GDS,
88 ALL = GLOBAL |
LDS | SCRATCH | GDS | OTHER,
93class SIMemOpInfo final {
96 friend class SIMemOpAccess;
100 SIAtomicScope Scope = SIAtomicScope::SYSTEM;
101 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
102 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
103 bool IsCrossAddressSpaceOrdering =
false;
104 bool IsVolatile =
false;
105 bool IsNonTemporal =
false;
106 bool IsLastUse =
false;
107 bool IsCooperative =
false;
110 const GCNSubtarget &ST,
112 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
113 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
114 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
115 bool IsCrossAddressSpaceOrdering =
true,
116 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
117 bool IsVolatile =
false,
bool IsNonTemporal =
false,
118 bool IsLastUse =
false,
bool IsCooperative =
false)
119 : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),
120 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
121 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
122 IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),
123 IsLastUse(IsLastUse), IsCooperative(IsCooperative) {
125 if (Ordering == AtomicOrdering::NotAtomic) {
126 assert(!IsCooperative &&
"Cannot be cooperative & non-atomic!");
127 assert(Scope == SIAtomicScope::NONE &&
128 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
129 !IsCrossAddressSpaceOrdering &&
130 FailureOrdering == AtomicOrdering::NotAtomic);
134 assert(Scope != SIAtomicScope::NONE &&
135 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
136 SIAtomicAddrSpace::NONE &&
137 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
138 SIAtomicAddrSpace::NONE);
143 if ((OrderingAddrSpace == InstrAddrSpace) &&
145 this->IsCrossAddressSpaceOrdering =
false;
149 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
150 SIAtomicAddrSpace::NONE) {
151 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
152 }
else if ((InstrAddrSpace &
153 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
154 SIAtomicAddrSpace::NONE) {
155 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
156 }
else if ((InstrAddrSpace &
157 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
158 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
159 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
164 if (this->Scope == SIAtomicScope::CLUSTER && !
ST.hasClusters())
165 this->Scope = SIAtomicScope::AGENT;
171 SIAtomicScope getScope()
const {
184 return FailureOrdering;
189 SIAtomicAddrSpace getInstrAddrSpace()
const {
190 return InstrAddrSpace;
195 SIAtomicAddrSpace getOrderingAddrSpace()
const {
196 return OrderingAddrSpace;
201 bool getIsCrossAddressSpaceOrdering()
const {
202 return IsCrossAddressSpaceOrdering;
207 bool isVolatile()
const {
213 bool isNonTemporal()
const {
214 return IsNonTemporal;
219 bool isLastUse()
const {
return IsLastUse; }
222 bool isCooperative()
const {
return IsCooperative; }
226 bool isAtomic()
const {
227 return Ordering != AtomicOrdering::NotAtomic;
232class SIMemOpAccess final {
234 const AMDGPUMachineModuleInfo *MMI =
nullptr;
235 const GCNSubtarget &ST;
239 const char *Msg)
const;
245 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
246 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
249 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
253 std::optional<SIMemOpInfo>
259 SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI,
const GCNSubtarget &ST);
262 std::optional<SIMemOpInfo>
267 std::optional<SIMemOpInfo>
272 std::optional<SIMemOpInfo>
277 std::optional<SIMemOpInfo>
281class SICacheControl {
285 const GCNSubtarget &ST;
288 const SIInstrInfo *TII =
nullptr;
295 SICacheControl(
const GCNSubtarget &ST);
305 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
312 SIAtomicAddrSpace AddrSpace)
const = 0;
319 SIAtomicAddrSpace AddrSpace)
const = 0;
326 SIAtomicAddrSpace AddrSpace)
const = 0;
332 SIAtomicAddrSpace AddrSpace,
333 SIMemOp
Op,
bool IsVolatile,
335 bool IsLastUse =
false)
const = 0;
337 virtual bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
342 virtual bool handleCooperativeAtomic(MachineInstr &
MI)
const {
344 "cooperative atomics are not available on this architecture");
355 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
356 bool IsCrossAddrSpaceOrdering, Position Pos,
366 SIAtomicAddrSpace AddrSpace,
367 Position Pos)
const = 0;
377 SIAtomicAddrSpace AddrSpace,
378 bool IsCrossAddrSpaceOrdering,
379 Position Pos)
const = 0;
388 virtual ~SICacheControl() =
default;
391class SIGfx6CacheControl :
public SICacheControl {
408 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
412 SIAtomicAddrSpace AddrSpace)
const override;
416 SIAtomicAddrSpace AddrSpace)
const override;
420 SIAtomicAddrSpace AddrSpace)
const override;
423 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
424 bool IsVolatile,
bool IsNonTemporal,
425 bool IsLastUse)
const override;
428 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
429 bool IsCrossAddrSpaceOrdering, Position Pos,
434 SIAtomicAddrSpace AddrSpace,
435 Position Pos)
const override;
439 SIAtomicAddrSpace AddrSpace,
440 bool IsCrossAddrSpaceOrdering,
441 Position Pos)
const override;
444class SIGfx7CacheControl :
public SIGfx6CacheControl {
447 SIGfx7CacheControl(
const GCNSubtarget &ST) : SIGfx6CacheControl(
ST) {}
451 SIAtomicAddrSpace AddrSpace,
452 Position Pos)
const override;
456class SIGfx90ACacheControl :
public SIGfx7CacheControl {
459 SIGfx90ACacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
463 SIAtomicAddrSpace AddrSpace)
const override;
467 SIAtomicAddrSpace AddrSpace)
const override;
470 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
471 bool IsVolatile,
bool IsNonTemporal,
472 bool IsLastUse)
const override;
475 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
476 bool IsCrossAddrSpaceOrdering, Position Pos,
481 SIAtomicAddrSpace AddrSpace,
482 Position Pos)
const override;
486 SIAtomicAddrSpace AddrSpace,
487 bool IsCrossAddrSpaceOrdering,
488 Position Pos)
const override;
491class SIGfx940CacheControl :
public SIGfx90ACacheControl {
513 SIGfx940CacheControl(
const GCNSubtarget &ST) : SIGfx90ACacheControl(
ST) {};
517 SIAtomicAddrSpace AddrSpace)
const override;
521 SIAtomicAddrSpace AddrSpace)
const override;
525 SIAtomicAddrSpace AddrSpace)
const override;
528 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
529 bool IsVolatile,
bool IsNonTemporal,
530 bool IsLastUse)
const override;
533 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
536 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
537 Position Pos)
const override;
540class SIGfx10CacheControl :
public SIGfx7CacheControl {
551 SIGfx10CacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
555 SIAtomicAddrSpace AddrSpace)
const override;
558 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
559 bool IsVolatile,
bool IsNonTemporal,
560 bool IsLastUse)
const override;
563 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
564 bool IsCrossAddrSpaceOrdering, Position Pos,
569 SIAtomicAddrSpace AddrSpace,
570 Position Pos)
const override;
575class SIGfx11CacheControl :
public SIGfx10CacheControl {
577 SIGfx11CacheControl(
const GCNSubtarget &ST) : SIGfx10CacheControl(
ST) {}
581 SIAtomicAddrSpace AddrSpace)
const override;
584 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
585 bool IsVolatile,
bool IsNonTemporal,
586 bool IsLastUse)
const override;
589class SIGfx12CacheControl :
public SIGfx11CacheControl {
610 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const;
613 SIGfx12CacheControl(
const GCNSubtarget &ST) : SIGfx11CacheControl(
ST) {
616 assert(!
ST.hasGFX1250Insts() ||
ST.isCuModeEnabled());
620 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
621 bool IsCrossAddrSpaceOrdering, Position Pos,
625 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
628 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
629 bool IsVolatile,
bool IsNonTemporal,
630 bool IsLastUse)
const override;
632 bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const override;
634 virtual bool handleCooperativeAtomic(MachineInstr &
MI)
const override;
637 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
638 Position Pos)
const override;
642 SIAtomicAddrSpace AddrSpace)
const override {
643 return setAtomicScope(
MI, Scope, AddrSpace);
648 SIAtomicAddrSpace AddrSpace)
const override {
649 return setAtomicScope(
MI, Scope, AddrSpace);
654 SIAtomicAddrSpace AddrSpace)
const override {
655 return setAtomicScope(
MI, Scope, AddrSpace);
659class SIMemoryLegalizer final {
661 const MachineModuleInfo &MMI;
663 std::unique_ptr<SICacheControl> CC =
nullptr;
666 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
670 bool isAtomicRet(
const MachineInstr &
MI)
const {
676 bool removeAtomicPseudoMIs();
680 bool expandLoad(
const SIMemOpInfo &MOI,
684 bool expandStore(
const SIMemOpInfo &MOI,
688 bool expandAtomicFence(
const SIMemOpInfo &MOI,
692 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
696 SIMemoryLegalizer(
const MachineModuleInfo &MMI) : MMI(MMI) {};
697 bool run(MachineFunction &MF);
704 SIMemoryLegalizerLegacy() : MachineFunctionPass(ID) {}
706 void getAnalysisUsage(AnalysisUsage &AU)
const override {
711 StringRef getPassName()
const override {
715 bool runOnMachineFunction(MachineFunction &MF)
override;
719 {
"global", SIAtomicAddrSpace::GLOBAL},
720 {
"local", SIAtomicAddrSpace::LDS},
728 OS <<
"unknown address space '" << AS <<
"'; expected one of ";
730 for (
const auto &[Name, Val] : ASNames)
731 OS <<
LS <<
'\'' <<
Name <<
'\'';
739static std::optional<SIAtomicAddrSpace>
741 static constexpr StringLiteral FenceASPrefix =
"amdgpu-synchronize-as";
747 SIAtomicAddrSpace
Result = SIAtomicAddrSpace::NONE;
748 for (
const auto &[Prefix, Suffix] : MMRA) {
749 if (Prefix != FenceASPrefix)
752 if (
auto It = ASNames.find(Suffix); It != ASNames.end())
755 diagnoseUnknownMMRAASName(
MI, Suffix);
758 if (Result == SIAtomicAddrSpace::NONE)
767 const char *Msg)
const {
768 const Function &
Func =
MI->getParent()->getParent()->getFunction();
769 Func.getContext().diagnose(
770 DiagnosticInfoUnsupported(Func, Msg,
MI->getDebugLoc()));
773std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
775 SIAtomicAddrSpace InstrAddrSpace)
const {
777 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
779 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
781 return std::tuple(SIAtomicScope::CLUSTER, SIAtomicAddrSpace::ATOMIC,
true);
783 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
786 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
789 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
792 return std::tuple(SIAtomicScope::SYSTEM,
793 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
795 return std::tuple(SIAtomicScope::AGENT,
796 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
798 return std::tuple(SIAtomicScope::CLUSTER,
799 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
801 return std::tuple(SIAtomicScope::WORKGROUP,
802 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
804 return std::tuple(SIAtomicScope::WAVEFRONT,
805 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
807 return std::tuple(SIAtomicScope::SINGLETHREAD,
808 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
812SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
814 return SIAtomicAddrSpace::FLAT;
816 return SIAtomicAddrSpace::GLOBAL;
818 return SIAtomicAddrSpace::LDS;
820 return SIAtomicAddrSpace::SCRATCH;
822 return SIAtomicAddrSpace::GDS;
824 return SIAtomicAddrSpace::OTHER;
827SIMemOpAccess::SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI_,
828 const GCNSubtarget &ST)
829 : MMI(&MMI_),
ST(
ST) {}
831std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
833 assert(
MI->getNumMemOperands() > 0);
838 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
839 bool IsNonTemporal =
true;
841 bool IsLastUse =
false;
842 bool IsCooperative =
false;
846 for (
const auto &MMO :
MI->memoperands()) {
847 IsNonTemporal &= MMO->isNonTemporal();
849 IsLastUse |= MMO->getFlags() &
MOLastUse;
852 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
854 if (OpOrdering != AtomicOrdering::NotAtomic) {
855 const auto &IsSyncScopeInclusion =
857 if (!IsSyncScopeInclusion) {
858 reportUnsupported(
MI,
859 "Unsupported non-inclusive atomic synchronization scope");
863 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
865 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
866 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
872 SIAtomicScope
Scope = SIAtomicScope::NONE;
873 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
874 bool IsCrossAddressSpaceOrdering =
false;
875 if (Ordering != AtomicOrdering::NotAtomic) {
876 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
878 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
881 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
883 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
884 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
885 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
886 reportUnsupported(
MI,
"Unsupported atomic address space");
890 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
891 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
892 IsNonTemporal, IsLastUse, IsCooperative);
895std::optional<SIMemOpInfo>
899 if (!(
MI->mayLoad() && !
MI->mayStore()))
903 if (
MI->getNumMemOperands() == 0)
904 return SIMemOpInfo(ST);
906 return constructFromMIWithMMO(
MI);
909std::optional<SIMemOpInfo>
913 if (!(!
MI->mayLoad() &&
MI->mayStore()))
917 if (
MI->getNumMemOperands() == 0)
918 return SIMemOpInfo(ST);
920 return constructFromMIWithMMO(
MI);
923std::optional<SIMemOpInfo>
927 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
934 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
936 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
940 SIAtomicScope
Scope = SIAtomicScope::NONE;
941 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
942 bool IsCrossAddressSpaceOrdering =
false;
943 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
946 if (OrderingAddrSpace != SIAtomicAddrSpace::ATOMIC) {
951 reportUnsupported(
MI,
"Unsupported atomic address space");
955 auto SynchronizeAS = getSynchronizeAddrSpaceMD(*
MI);
957 OrderingAddrSpace = *SynchronizeAS;
959 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace,
960 SIAtomicAddrSpace::ATOMIC, IsCrossAddressSpaceOrdering,
961 AtomicOrdering::NotAtomic);
964std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
968 if (!(
MI->mayLoad() &&
MI->mayStore()))
972 if (
MI->getNumMemOperands() == 0)
973 return SIMemOpInfo(ST);
975 return constructFromMIWithMMO(
MI);
978SICacheControl::SICacheControl(
const GCNSubtarget &ST) :
ST(
ST) {
979 TII =
ST.getInstrInfo();
986 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, AMDGPU::OpName::cpol);
995std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
996 GCNSubtarget::Generation Generation =
ST.getGeneration();
997 if (
ST.hasGFX940Insts())
998 return std::make_unique<SIGfx940CacheControl>(ST);
999 if (
ST.hasGFX90AInsts())
1000 return std::make_unique<SIGfx90ACacheControl>(ST);
1001 if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
1002 return std::make_unique<SIGfx6CacheControl>(ST);
1003 if (Generation < AMDGPUSubtarget::GFX10)
1004 return std::make_unique<SIGfx7CacheControl>(ST);
1005 if (Generation < AMDGPUSubtarget::GFX11)
1006 return std::make_unique<SIGfx10CacheControl>(ST);
1007 if (Generation < AMDGPUSubtarget::GFX12)
1008 return std::make_unique<SIGfx11CacheControl>(ST);
1009 return std::make_unique<SIGfx12CacheControl>(ST);
1012bool SIGfx6CacheControl::enableLoadCacheBypass(
1014 SIAtomicScope Scope,
1015 SIAtomicAddrSpace AddrSpace)
const {
1019 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1021 case SIAtomicScope::SYSTEM:
1022 case SIAtomicScope::AGENT:
1027 case SIAtomicScope::WORKGROUP:
1028 case SIAtomicScope::WAVEFRONT:
1029 case SIAtomicScope::SINGLETHREAD:
1047bool SIGfx6CacheControl::enableStoreCacheBypass(
1049 SIAtomicScope Scope,
1050 SIAtomicAddrSpace AddrSpace)
const {
1060bool SIGfx6CacheControl::enableRMWCacheBypass(
1062 SIAtomicScope Scope,
1063 SIAtomicAddrSpace AddrSpace)
const {
1075bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1077 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1087 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1095 if (
Op == SIMemOp::LOAD)
1103 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1104 Position::AFTER, AtomicOrdering::Unordered);
1109 if (IsNonTemporal) {
1121 SIAtomicScope Scope,
1122 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1123 bool IsCrossAddrSpaceOrdering, Position Pos,
1127 MachineBasicBlock &
MBB = *
MI->getParent();
1130 if (Pos == Position::AFTER)
1134 bool LGKMCnt =
false;
1136 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1137 SIAtomicAddrSpace::NONE) {
1139 case SIAtomicScope::SYSTEM:
1140 case SIAtomicScope::AGENT:
1143 case SIAtomicScope::WORKGROUP:
1144 case SIAtomicScope::WAVEFRONT:
1145 case SIAtomicScope::SINGLETHREAD:
1154 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1156 case SIAtomicScope::SYSTEM:
1157 case SIAtomicScope::AGENT:
1158 case SIAtomicScope::WORKGROUP:
1165 LGKMCnt |= IsCrossAddrSpaceOrdering;
1167 case SIAtomicScope::WAVEFRONT:
1168 case SIAtomicScope::SINGLETHREAD:
1177 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1179 case SIAtomicScope::SYSTEM:
1180 case SIAtomicScope::AGENT:
1187 LGKMCnt |= IsCrossAddrSpaceOrdering;
1189 case SIAtomicScope::WORKGROUP:
1190 case SIAtomicScope::WAVEFRONT:
1191 case SIAtomicScope::SINGLETHREAD:
1200 if (VMCnt || LGKMCnt) {
1201 unsigned WaitCntImmediate =
1207 .
addImm(WaitCntImmediate);
1215 Scope == SIAtomicScope::WORKGROUP &&
1216 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1221 if (Pos == Position::AFTER)
1228 SIAtomicScope Scope,
1229 SIAtomicAddrSpace AddrSpace,
1230 Position Pos)
const {
1231 if (!InsertCacheInv)
1236 MachineBasicBlock &
MBB = *
MI->getParent();
1239 if (Pos == Position::AFTER)
1242 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1244 case SIAtomicScope::SYSTEM:
1245 case SIAtomicScope::AGENT:
1249 case SIAtomicScope::WORKGROUP:
1250 case SIAtomicScope::WAVEFRONT:
1251 case SIAtomicScope::SINGLETHREAD:
1266 if (Pos == Position::AFTER)
1273 SIAtomicScope Scope,
1274 SIAtomicAddrSpace AddrSpace,
1275 bool IsCrossAddrSpaceOrdering,
1276 Position Pos)
const {
1277 return insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1278 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
1282 SIAtomicScope Scope,
1283 SIAtomicAddrSpace AddrSpace,
1284 Position Pos)
const {
1285 if (!InsertCacheInv)
1290 MachineBasicBlock &
MBB = *
MI->getParent();
1296 ? AMDGPU::BUFFER_WBINVL1
1297 : AMDGPU::BUFFER_WBINVL1_VOL;
1299 if (Pos == Position::AFTER)
1302 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1304 case SIAtomicScope::SYSTEM:
1305 case SIAtomicScope::AGENT:
1309 case SIAtomicScope::WORKGROUP:
1310 case SIAtomicScope::WAVEFRONT:
1311 case SIAtomicScope::SINGLETHREAD:
1326 if (Pos == Position::AFTER)
1332bool SIGfx90ACacheControl::enableLoadCacheBypass(
1334 SIAtomicScope Scope,
1335 SIAtomicAddrSpace AddrSpace)
const {
1339 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1341 case SIAtomicScope::SYSTEM:
1342 case SIAtomicScope::AGENT:
1347 case SIAtomicScope::WORKGROUP:
1352 if (
ST.isTgSplitEnabled())
1355 case SIAtomicScope::WAVEFRONT:
1356 case SIAtomicScope::SINGLETHREAD:
1374bool SIGfx90ACacheControl::enableRMWCacheBypass(
1376 SIAtomicScope Scope,
1377 SIAtomicAddrSpace AddrSpace)
const {
1381 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1383 case SIAtomicScope::SYSTEM:
1384 case SIAtomicScope::AGENT:
1389 case SIAtomicScope::WORKGROUP:
1390 case SIAtomicScope::WAVEFRONT:
1391 case SIAtomicScope::SINGLETHREAD:
1402bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1404 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1414 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1422 if (
Op == SIMemOp::LOAD)
1430 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1431 Position::AFTER, AtomicOrdering::Unordered);
1436 if (IsNonTemporal) {
1448 SIAtomicScope Scope,
1449 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1450 bool IsCrossAddrSpaceOrdering,
1453 if (
ST.isTgSplitEnabled()) {
1461 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1462 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1463 (Scope == SIAtomicScope::WORKGROUP)) {
1465 Scope = SIAtomicScope::AGENT;
1469 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1471 return SIGfx7CacheControl::insertWait(
MI, Scope, AddrSpace,
Op,
1472 IsCrossAddrSpaceOrdering, Pos, Order);
1476 SIAtomicScope Scope,
1477 SIAtomicAddrSpace AddrSpace,
1478 Position Pos)
const {
1479 if (!InsertCacheInv)
1484 MachineBasicBlock &
MBB = *
MI->getParent();
1487 if (Pos == Position::AFTER)
1490 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1492 case SIAtomicScope::SYSTEM:
1504 case SIAtomicScope::AGENT:
1507 case SIAtomicScope::WORKGROUP:
1512 if (
ST.isTgSplitEnabled()) {
1514 Scope = SIAtomicScope::AGENT;
1517 case SIAtomicScope::WAVEFRONT:
1518 case SIAtomicScope::SINGLETHREAD:
1533 if (Pos == Position::AFTER)
1536 Changed |= SIGfx7CacheControl::insertAcquire(
MI, Scope, AddrSpace, Pos);
1542 SIAtomicScope Scope,
1543 SIAtomicAddrSpace AddrSpace,
1544 bool IsCrossAddrSpaceOrdering,
1545 Position Pos)
const {
1548 MachineBasicBlock &
MBB = *
MI->getParent();
1551 if (Pos == Position::AFTER)
1554 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1556 case SIAtomicScope::SYSTEM:
1570 case SIAtomicScope::AGENT:
1571 case SIAtomicScope::WORKGROUP:
1572 case SIAtomicScope::WAVEFRONT:
1573 case SIAtomicScope::SINGLETHREAD:
1581 if (Pos == Position::AFTER)
1585 SIGfx7CacheControl::insertRelease(
MI, Scope, AddrSpace,
1586 IsCrossAddrSpaceOrdering, Pos);
1591bool SIGfx940CacheControl::enableLoadCacheBypass(
1593 SIAtomicAddrSpace AddrSpace)
const {
1597 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1599 case SIAtomicScope::SYSTEM:
1604 case SIAtomicScope::AGENT:
1608 case SIAtomicScope::WORKGROUP:
1616 case SIAtomicScope::WAVEFRONT:
1617 case SIAtomicScope::SINGLETHREAD:
1635bool SIGfx940CacheControl::enableStoreCacheBypass(
1637 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const {
1641 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1643 case SIAtomicScope::SYSTEM:
1648 case SIAtomicScope::AGENT:
1652 case SIAtomicScope::WORKGROUP:
1656 case SIAtomicScope::WAVEFRONT:
1657 case SIAtomicScope::SINGLETHREAD:
1675bool SIGfx940CacheControl::enableRMWCacheBypass(
1677 SIAtomicAddrSpace AddrSpace)
const {
1681 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1683 case SIAtomicScope::SYSTEM:
1687 case SIAtomicScope::AGENT:
1688 case SIAtomicScope::WORKGROUP:
1689 case SIAtomicScope::WAVEFRONT:
1690 case SIAtomicScope::SINGLETHREAD:
1704bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
1706 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1716 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1730 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1731 Position::AFTER, AtomicOrdering::Unordered);
1736 if (IsNonTemporal) {
1745 SIAtomicScope Scope,
1746 SIAtomicAddrSpace AddrSpace,
1747 Position Pos)
const {
1748 if (!InsertCacheInv)
1753 MachineBasicBlock &
MBB = *
MI->getParent();
1756 if (Pos == Position::AFTER)
1759 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1761 case SIAtomicScope::SYSTEM:
1775 case SIAtomicScope::AGENT:
1788 case SIAtomicScope::WORKGROUP:
1793 if (
ST.isTgSplitEnabled()) {
1807 case SIAtomicScope::WAVEFRONT:
1808 case SIAtomicScope::SINGLETHREAD:
1824 if (Pos == Position::AFTER)
1831 SIAtomicScope Scope,
1832 SIAtomicAddrSpace AddrSpace,
1833 bool IsCrossAddrSpaceOrdering,
1834 Position Pos)
const {
1837 MachineBasicBlock &
MBB = *
MI->getParent();
1840 if (Pos == Position::AFTER)
1843 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1845 case SIAtomicScope::SYSTEM:
1860 case SIAtomicScope::AGENT:
1870 case SIAtomicScope::WORKGROUP:
1871 case SIAtomicScope::WAVEFRONT:
1872 case SIAtomicScope::SINGLETHREAD:
1882 if (Pos == Position::AFTER)
1887 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1888 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
1893bool SIGfx10CacheControl::enableLoadCacheBypass(
1895 SIAtomicScope Scope,
1896 SIAtomicAddrSpace AddrSpace)
const {
1900 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1902 case SIAtomicScope::SYSTEM:
1903 case SIAtomicScope::AGENT:
1909 case SIAtomicScope::WORKGROUP:
1914 if (!
ST.isCuModeEnabled())
1917 case SIAtomicScope::WAVEFRONT:
1918 case SIAtomicScope::SINGLETHREAD:
1936bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1938 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1949 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1957 if (
Op == SIMemOp::LOAD) {
1967 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1968 Position::AFTER, AtomicOrdering::Unordered);
1972 if (IsNonTemporal) {
1977 if (
Op == SIMemOp::STORE)
1988 SIAtomicScope Scope,
1989 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1990 bool IsCrossAddrSpaceOrdering,
1994 MachineBasicBlock &
MBB = *
MI->getParent();
1997 if (Pos == Position::AFTER)
2002 bool LGKMCnt =
false;
2004 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2005 SIAtomicAddrSpace::NONE) {
2007 case SIAtomicScope::SYSTEM:
2008 case SIAtomicScope::AGENT:
2009 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2011 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2014 case SIAtomicScope::WORKGROUP:
2020 if (!
ST.isCuModeEnabled()) {
2021 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2023 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2027 case SIAtomicScope::WAVEFRONT:
2028 case SIAtomicScope::SINGLETHREAD:
2037 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2039 case SIAtomicScope::SYSTEM:
2040 case SIAtomicScope::AGENT:
2041 case SIAtomicScope::WORKGROUP:
2048 LGKMCnt |= IsCrossAddrSpaceOrdering;
2050 case SIAtomicScope::WAVEFRONT:
2051 case SIAtomicScope::SINGLETHREAD:
2060 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
2062 case SIAtomicScope::SYSTEM:
2063 case SIAtomicScope::AGENT:
2070 LGKMCnt |= IsCrossAddrSpaceOrdering;
2072 case SIAtomicScope::WORKGROUP:
2073 case SIAtomicScope::WAVEFRONT:
2074 case SIAtomicScope::SINGLETHREAD:
2083 if (VMCnt || LGKMCnt) {
2084 unsigned WaitCntImmediate =
2090 .
addImm(WaitCntImmediate);
2098 Scope == SIAtomicScope::WORKGROUP &&
2099 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2111 if (Pos == Position::AFTER)
2118 SIAtomicScope Scope,
2119 SIAtomicAddrSpace AddrSpace,
2120 Position Pos)
const {
2121 if (!InsertCacheInv)
2126 MachineBasicBlock &
MBB = *
MI->getParent();
2129 if (Pos == Position::AFTER)
2132 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2134 case SIAtomicScope::SYSTEM:
2135 case SIAtomicScope::AGENT:
2143 case SIAtomicScope::WORKGROUP:
2148 if (!
ST.isCuModeEnabled()) {
2153 case SIAtomicScope::WAVEFRONT:
2154 case SIAtomicScope::SINGLETHREAD:
2169 if (Pos == Position::AFTER)
2175bool SIGfx10CacheControl::insertBarrierStart(
2182 if (!
ST.isCuModeEnabled() ||
ST.hasGFX1250Insts())
2186 TII->get(AMDGPU::S_WAITCNT_DEPCTR))
2191bool SIGfx11CacheControl::enableLoadCacheBypass(
2193 SIAtomicAddrSpace AddrSpace)
const {
2197 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2199 case SIAtomicScope::SYSTEM:
2200 case SIAtomicScope::AGENT:
2205 case SIAtomicScope::WORKGROUP:
2210 if (!
ST.isCuModeEnabled())
2213 case SIAtomicScope::WAVEFRONT:
2214 case SIAtomicScope::SINGLETHREAD:
2232bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
2234 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2245 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2253 if (
Op == SIMemOp::LOAD)
2264 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2265 Position::AFTER, AtomicOrdering::Unordered);
2269 if (IsNonTemporal) {
2274 if (
Op == SIMemOp::STORE)
2288 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
2303 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
2316bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
2320 MachineBasicBlock &
MBB = *
MI->getParent();
2324 if (
ST.hasImageInsts()) {
2335 SIAtomicScope Scope,
2336 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
2337 bool IsCrossAddrSpaceOrdering,
2341 MachineBasicBlock &
MBB = *
MI->getParent();
2344 bool LOADCnt =
false;
2346 bool STORECnt =
false;
2348 if (Pos == Position::AFTER)
2351 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2352 SIAtomicAddrSpace::NONE) {
2354 case SIAtomicScope::SYSTEM:
2355 case SIAtomicScope::AGENT:
2356 case SIAtomicScope::CLUSTER:
2357 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2359 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2362 case SIAtomicScope::WORKGROUP:
2372 if (!
ST.isCuModeEnabled() ||
ST.hasGFX1250Insts()) {
2373 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2375 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2379 case SIAtomicScope::WAVEFRONT:
2380 case SIAtomicScope::SINGLETHREAD:
2389 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2391 case SIAtomicScope::SYSTEM:
2392 case SIAtomicScope::AGENT:
2393 case SIAtomicScope::CLUSTER:
2394 case SIAtomicScope::WORKGROUP:
2401 DSCnt |= IsCrossAddrSpaceOrdering;
2403 case SIAtomicScope::WAVEFRONT:
2404 case SIAtomicScope::SINGLETHREAD:
2425 if (Order != AtomicOrdering::Acquire &&
ST.hasImageInsts()) {
2443 if (Pos == Position::AFTER)
2450 SIAtomicScope Scope,
2451 SIAtomicAddrSpace AddrSpace,
2452 Position Pos)
const {
2453 if (!InsertCacheInv)
2456 MachineBasicBlock &
MBB = *
MI->getParent();
2465 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2470 case SIAtomicScope::SYSTEM:
2473 case SIAtomicScope::AGENT:
2476 case SIAtomicScope::CLUSTER:
2479 case SIAtomicScope::WORKGROUP:
2488 if (
ST.isCuModeEnabled())
2493 case SIAtomicScope::WAVEFRONT:
2494 case SIAtomicScope::SINGLETHREAD:
2501 if (Pos == Position::AFTER)
2506 if (Pos == Position::AFTER)
2513 SIAtomicScope Scope,
2514 SIAtomicAddrSpace AddrSpace,
2515 bool IsCrossAddrSpaceOrdering,
2516 Position Pos)
const {
2517 MachineBasicBlock &
MBB = *
MI->getParent();
2526 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2529 if (Pos == Position::AFTER)
2538 case SIAtomicScope::SYSTEM:
2542 case SIAtomicScope::AGENT:
2544 if (
ST.hasGFX1250Insts()) {
2549 case SIAtomicScope::CLUSTER:
2550 case SIAtomicScope::WORKGROUP:
2553 case SIAtomicScope::WAVEFRONT:
2554 case SIAtomicScope::SINGLETHREAD:
2561 if (Pos == Position::AFTER)
2567 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2568 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
2573bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2575 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2584 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2591 }
else if (IsNonTemporal) {
2604 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2605 Position::AFTER, AtomicOrdering::Unordered);
2611bool SIGfx12CacheControl::finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
2612 assert(
MI.mayStore() &&
"Not a Store inst");
2613 const bool IsRMW = (
MI.mayLoad() &&
MI.mayStore());
2618 if (Atomic &&
ST.requiresWaitXCntBeforeAtomicStores() &&
TII->isFLAT(
MI)) {
2619 MachineBasicBlock &
MBB = *
MI.getParent();
2628 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2635 Changed |= insertWaitsBeforeSystemScopeStore(
MI.getIterator());
2640bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &
MI)
const {
2641 if (!
ST.hasGFX1250Insts())
2645 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2646 assert(CPol &&
"No CPol operand?");
2654 SIAtomicScope Scope,
2655 SIAtomicAddrSpace AddrSpace)
const {
2658 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2660 case SIAtomicScope::SYSTEM:
2663 case SIAtomicScope::AGENT:
2666 case SIAtomicScope::CLUSTER:
2669 case SIAtomicScope::WORKGROUP:
2672 if (!
ST.isCuModeEnabled())
2675 case SIAtomicScope::WAVEFRONT:
2676 case SIAtomicScope::SINGLETHREAD:
2694bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2695 if (AtomicPseudoMIs.empty())
2698 for (
auto &
MI : AtomicPseudoMIs)
2699 MI->eraseFromParent();
2701 AtomicPseudoMIs.clear();
2705bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2711 if (MOI.isAtomic()) {
2713 if (Order == AtomicOrdering::Monotonic ||
2714 Order == AtomicOrdering::Acquire ||
2715 Order == AtomicOrdering::SequentiallyConsistent) {
2716 Changed |= CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2717 MOI.getOrderingAddrSpace());
2722 if (MOI.isCooperative())
2723 Changed |= CC->handleCooperativeAtomic(*
MI);
2725 if (Order == AtomicOrdering::SequentiallyConsistent)
2726 Changed |= CC->insertWait(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2727 SIMemOp::LOAD | SIMemOp::STORE,
2728 MOI.getIsCrossAddressSpaceOrdering(),
2729 Position::BEFORE, Order);
2731 if (Order == AtomicOrdering::Acquire ||
2732 Order == AtomicOrdering::SequentiallyConsistent) {
2734 MI, MOI.getScope(), MOI.getInstrAddrSpace(), SIMemOp::LOAD,
2735 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order);
2736 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2737 MOI.getOrderingAddrSpace(),
2747 Changed |= CC->enableVolatileAndOrNonTemporal(
2748 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2749 MOI.isNonTemporal(), MOI.isLastUse());
2754bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2760 MachineInstr &StoreMI = *
MI;
2762 if (MOI.isAtomic()) {
2763 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2764 MOI.getOrdering() == AtomicOrdering::Release ||
2765 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2766 Changed |= CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2767 MOI.getOrderingAddrSpace());
2772 if (MOI.isCooperative())
2773 Changed |= CC->handleCooperativeAtomic(*
MI);
2775 if (MOI.getOrdering() == AtomicOrdering::Release ||
2776 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2777 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2778 MOI.getOrderingAddrSpace(),
2779 MOI.getIsCrossAddressSpaceOrdering(),
2782 Changed |= CC->finalizeStore(StoreMI,
true);
2789 Changed |= CC->enableVolatileAndOrNonTemporal(
2790 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2791 MOI.isNonTemporal());
2795 Changed |= CC->finalizeStore(StoreMI,
false);
2799bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2801 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2803 AtomicPseudoMIs.push_back(
MI);
2806 const SIAtomicAddrSpace OrderingAddrSpace = MOI.getOrderingAddrSpace();
2808 if (MOI.isAtomic()) {
2810 if (Order == AtomicOrdering::Acquire) {
2812 MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2813 MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE, Order);
2816 if (Order == AtomicOrdering::Release ||
2817 Order == AtomicOrdering::AcquireRelease ||
2818 Order == AtomicOrdering::SequentiallyConsistent)
2826 Changed |= CC->insertRelease(
MI, MOI.getScope(), OrderingAddrSpace,
2827 MOI.getIsCrossAddressSpaceOrdering(),
2835 if (Order == AtomicOrdering::Acquire ||
2836 Order == AtomicOrdering::AcquireRelease ||
2837 Order == AtomicOrdering::SequentiallyConsistent)
2838 Changed |= CC->insertAcquire(
MI, MOI.getScope(), OrderingAddrSpace,
2847bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2852 MachineInstr &RMWMI = *
MI;
2854 if (MOI.isAtomic()) {
2856 if (Order == AtomicOrdering::Monotonic ||
2857 Order == AtomicOrdering::Acquire || Order == AtomicOrdering::Release ||
2858 Order == AtomicOrdering::AcquireRelease ||
2859 Order == AtomicOrdering::SequentiallyConsistent) {
2860 Changed |= CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2861 MOI.getInstrAddrSpace());
2864 if (Order == AtomicOrdering::Release ||
2865 Order == AtomicOrdering::AcquireRelease ||
2866 Order == AtomicOrdering::SequentiallyConsistent ||
2867 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2868 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2869 MOI.getOrderingAddrSpace(),
2870 MOI.getIsCrossAddressSpaceOrdering(),
2873 if (Order == AtomicOrdering::Acquire ||
2874 Order == AtomicOrdering::AcquireRelease ||
2875 Order == AtomicOrdering::SequentiallyConsistent ||
2876 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2877 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2879 MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2880 isAtomicRet(*
MI) ? SIMemOp::LOAD : SIMemOp::STORE,
2881 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order);
2882 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2883 MOI.getOrderingAddrSpace(),
2887 Changed |= CC->finalizeStore(RMWMI,
true);
2894bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {
2895 const MachineModuleInfo &MMI =
2896 getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
2897 return SIMemoryLegalizer(MMI).run(MF);
2904 .getCachedResult<MachineModuleAnalysis>(
2906 assert(MMI &&
"MachineModuleAnalysis must be available");
2907 if (!SIMemoryLegalizer(MMI->getMMI()).run(MF))
2917 CC = SICacheControl::create(ST);
2919 for (
auto &
MBB : MF) {
2923 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2926 I != E &&
I->isBundledWithPred(); ++
I) {
2927 I->unbundleFromPred();
2930 MO.setIsInternalRead(
false);
2933 MI->eraseFromParent();
2934 MI =
II->getIterator();
2937 if (
ST.getInstrInfo()->isBarrierStart(
MI->getOpcode())) {
2945 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2947 else if (
const auto &MOI = MOA.getStoreInfo(
MI)) {
2949 }
else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2951 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2952 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2956 Changed |= removeAtomicPseudoMIs();
2962char SIMemoryLegalizerLegacy::
ID = 0;
2966 return new SIMemoryLegalizerLegacy();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This header defines various interfaces for pass management in LLVM.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static const uint32_t IV[8]
SyncScope::ID getWorkgroupSSID() const
SyncScope::ID getWavefrontSSID() const
SyncScope::ID getAgentSSID() const
SyncScope::ID getClusterOneAddressSpaceSSID() const
SyncScope::ID getClusterSSID() const
std::optional< bool > isSyncScopeInclusion(SyncScope::ID A, SyncScope::ID B) const
In AMDGPU target synchronization scopes are inclusive, meaning a larger synchronization scope is incl...
SyncScope::ID getAgentOneAddressSpaceSSID() const
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const
SyncScope::ID getWavefrontOneAddressSpaceSSID() const
SyncScope::ID getSystemOneAddressSpaceSSID() const
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
A helper class to return the specified delimiter string after the first invocation of operator String...
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isAtomicRet(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
StringRef - Represent a constant reference to a string, i.e.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Scope
Defines the scope in which this symbol should be visible: Default â Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool isReleaseOrStronger(AtomicOrdering AO)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
DWARFExpression::Operation Op
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()