72#define DEBUG_TYPE "arm-ldst-opt"
74STATISTIC(NumLDMGened ,
"Number of ldm instructions generated");
75STATISTIC(NumSTMGened ,
"Number of stm instructions generated");
76STATISTIC(NumVLDMGened,
"Number of vldm instructions generated");
77STATISTIC(NumVSTMGened,
"Number of vstm instructions generated");
78STATISTIC(NumLdStMoved,
"Number of load / store instructions moved");
79STATISTIC(NumLDRDFormed,
"Number of ldrd created before allocation");
80STATISTIC(NumSTRDFormed,
"Number of strd created before allocation");
81STATISTIC(NumLDRD2LDM,
"Number of ldrd instructions turned back into ldm");
82STATISTIC(NumSTRD2STM,
"Number of strd instructions turned back into stm");
83STATISTIC(NumLDRD2LDR,
"Number of ldrd instructions turned back into ldr's");
84STATISTIC(NumSTRD2STR,
"Number of strd instructions turned back into str's");
95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
114 bool RegClassInfoValid;
115 bool isThumb1, isThumb2;
130 struct MemOpQueueEntry {
142 struct MergeCandidate {
147 unsigned LatestMIIdx;
150 unsigned EarliestMIIdx;
157 bool CanMergeToLSMulti;
160 bool CanMergeToLSDouble;
171 unsigned Base,
unsigned WordOffset,
175 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
177 ArrayRef<std::pair<unsigned, bool>> Regs,
181 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
183 ArrayRef<std::pair<unsigned, bool>> Regs,
185 void FormCandidates(
const MemOpQueue &MemOps);
186 MachineInstr *MergeOpsUpdate(
const MergeCandidate &Cand);
199char ARMLoadStoreOpt::ID = 0;
205 for (
const auto &MO :
MI.operands()) {
208 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
218 unsigned Opcode =
MI.getOpcode();
219 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
220 unsigned NumOperands =
MI.getDesc().getNumOperands();
221 unsigned OffField =
MI.getOperand(NumOperands - 3).getImm();
223 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
224 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
225 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
226 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
230 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
231 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
246 return MI.getOperand(1);
250 return MI.getOperand(0);
347 case ARM::tLDMIA_UPD:
348 case ARM::tSTMIA_UPD:
349 case ARM::t2LDMIA_RET:
351 case ARM::t2LDMIA_UPD:
353 case ARM::t2STMIA_UPD:
355 case ARM::VLDMSIA_UPD:
357 case ARM::VSTMSIA_UPD:
359 case ARM::VLDMDIA_UPD:
361 case ARM::VSTMDIA_UPD:
375 case ARM::t2LDMDB_UPD:
377 case ARM::t2STMDB_UPD:
378 case ARM::VLDMSDB_UPD:
379 case ARM::VSTMSDB_UPD:
380 case ARM::VLDMDDB_UPD:
381 case ARM::VSTMDDB_UPD:
393 return Opc == ARM::tLDRi ||
Opc == ARM::tLDRspi;
397 return Opc == ARM::t2LDRi12 ||
Opc == ARM::t2LDRi8;
405 return Opc == ARM::tSTRi ||
Opc == ARM::tSTRspi;
409 return Opc == ARM::t2STRi12 ||
Opc == ARM::t2STRi8;
438 switch (
MI->getOpcode()) {
465 case ARM::tLDMIA_UPD:
466 case ARM::tSTMIA_UPD:
473 return (
MI->getNumOperands() -
MI->getDesc().getNumOperands() + 1) * 4;
476 return (
MI->getNumOperands() -
MI->getDesc().getNumOperands() + 1) * 8;
488 assert(isThumb1 &&
"Can only update base register uses for Thumb1!");
492 bool InsertSub =
false;
493 unsigned Opc =
MBBI->getOpcode();
495 if (
MBBI->readsRegister(
Base,
nullptr)) {
498 Opc == ARM::tLDRi ||
Opc == ARM::tLDRHi ||
Opc == ARM::tLDRBi;
500 Opc == ARM::tSTRi ||
Opc == ARM::tSTRHi ||
Opc == ARM::tSTRBi;
502 if (IsLoad || IsStore) {
508 MBBI->getOperand(
MBBI->getDesc().getNumOperands() - 3);
515 if (
Offset >= 0 && !(IsStore && InstrSrcReg ==
Base))
519 }
else if ((
Opc == ARM::tSUBi8 ||
Opc == ARM::tADDi8) &&
520 !definesCPSR(*
MBBI)) {
525 MBBI->getOperand(
MBBI->getDesc().getNumOperands() - 3);
527 MO.
getImm() + WordOffset * 4 :
528 MO.
getImm() - WordOffset * 4 ;
542 }
else if (definesCPSR(*
MBBI) ||
MBBI->isCall() ||
MBBI->isBranch()) {
560 if (
MBBI->killsRegister(
Base,
nullptr) ||
561 MBBI->definesRegister(
Base,
nullptr))
583unsigned ARMLoadStoreOpt::findFreeReg(
const TargetRegisterClass &RegClass) {
584 if (!RegClassInfoValid) {
586 RegClassInfoValid =
true;
589 for (
unsigned Reg : RegClassInfo.
getOrder(&RegClass))
598void ARMLoadStoreOpt::moveLiveRegsBefore(
const MachineBasicBlock &
MBB,
601 if (!LiveRegsValid) {
605 LiveRegsValid =
true;
608 while (LiveRegPos != Before) {
616 for (
const std::pair<unsigned, bool> &R : Regs)
625MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
627 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
629 ArrayRef<std::pair<unsigned, bool>> Regs,
631 unsigned NumRegs = Regs.size();
636 bool SafeToClobberCPSR = !isThumb1 ||
640 bool Writeback = isThumb1;
646 assert(
Base != ARM::SP &&
"Thumb1 does not allow SP in register list");
647 if (Opcode == ARM::tLDRi)
649 else if (Opcode == ARM::tSTRi)
656 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
658 if (
Offset == 4 && haveIBAndDA) {
660 }
else if (
Offset == -4 * (
int)NumRegs + 4 && haveIBAndDA) {
662 }
else if (
Offset == -4 * (
int)NumRegs && isNotVFP && !isThumb1) {
665 }
else if (
Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
678 if (!SafeToClobberCPSR)
685 NewBase = Regs[NumRegs-1].first;
689 moveLiveRegsBefore(
MBB, InsertBefore);
693 for (
const std::pair<unsigned, bool> &R : Regs)
696 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
701 int BaseOpc = isThumb2 ? (BaseKill &&
Base == ARM::SP ? ARM::t2ADDspImm
705 : (isThumb1 &&
Offset < 8)
707 : isThumb1 ?
ARM::tADDi8 :
ARM::ADDri;
713 BaseOpc = isThumb2 ? (BaseKill &&
Base == ARM::SP ? ARM::t2SUBspImm
717 : isThumb1 ?
ARM::tSUBi8 :
ARM::SUBri;
726 bool KillOldBase = BaseKill &&
735 if (
Base != NewBase &&
736 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
754 if (BaseOpc == ARM::tADDrSPi) {
755 assert(
Offset % 4 == 0 &&
"tADDrSPi offset is scaled by 4");
793 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
796 MachineInstrBuilder MIB;
799 assert(isThumb1 &&
"expected Writeback only inThumb1");
800 if (Opcode == ARM::tLDMIA) {
803 Opcode = ARM::tLDMIA_UPD;
815 UpdateBaseRegUses(
MBB, InsertBefore,
DL,
Base, NumRegs, Pred, PredReg);
824 for (
const std::pair<unsigned, bool> &R : Regs)
832MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
834 int Offset,
unsigned Base,
bool BaseKill,
unsigned Opcode,
836 ArrayRef<std::pair<unsigned, bool>> Regs,
839 assert((IsLoad ||
isi32Store(Opcode)) &&
"Must have integer load or store");
840 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
843 MachineInstrBuilder MIB =
BuildMI(
MBB, InsertBefore,
DL,
844 TII->get(LoadStoreOpcode));
858MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(
const MergeCandidate &Cand) {
859 const MachineInstr *
First = Cand.Instrs.front();
860 unsigned Opcode =
First->getOpcode();
863 SmallVector<unsigned, 4> ImpDefs;
864 DenseSet<unsigned> KilledRegs;
865 DenseSet<unsigned> UsedRegs;
867 for (
const MachineInstr *
MI : Cand.Instrs) {
870 bool IsKill = MO.
isKill();
880 for (
const MachineOperand &MO :
MI->implicit_operands()) {
889 if (
MI->readsRegister(DefReg,
nullptr))
899 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
908 MachineInstr *Merged =
nullptr;
909 if (Cand.CanMergeToLSDouble)
910 Merged = CreateLoadStoreDouble(
MBB, InsertBefore,
Offset,
Base, BaseKill,
911 Opcode, Pred, PredReg,
DL, Regs,
913 if (!Merged && Cand.CanMergeToLSMulti)
914 Merged = CreateLoadStoreMulti(
MBB, InsertBefore,
Offset,
Base, BaseKill,
915 Opcode, Pred, PredReg,
DL, Regs, Cand.Instrs);
921 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
922 bool EarliestAtBegin =
false;
924 EarliestAtBegin =
true;
926 EarliestI = std::prev(EarliestI);
930 for (MachineInstr *
MI : Cand.Instrs)
937 EarliestI = std::next(EarliestI);
943 for (MachineInstr &
MI : FixupRange) {
944 for (
unsigned &ImpDefReg : ImpDefs) {
945 for (MachineOperand &MO :
MI.implicit_operands()) {
957 for (
unsigned ImpDef : ImpDefs)
962 for (MachineInstr &
MI : FixupRange) {
963 for (MachineOperand &MO :
MI.uses()) {
989 unsigned Opcode =
MI.getOpcode();
1002void ARMLoadStoreOpt::FormCandidates(
const MemOpQueue &MemOps) {
1003 const MachineInstr *FirstMI = MemOps[0].MI;
1008 unsigned SIndex = 0;
1009 unsigned EIndex = MemOps.size();
1012 const MachineInstr *
MI = MemOps[SIndex].MI;
1013 int Offset = MemOps[SIndex].Offset;
1016 unsigned PRegNum = PMO.
isUndef() ? std::numeric_limits<unsigned>::max()
1017 :
TRI->getEncodingValue(PReg);
1018 unsigned Latest = SIndex;
1019 unsigned Earliest = SIndex;
1021 bool CanMergeToLSDouble =
1027 CanMergeToLSDouble =
false;
1029 bool CanMergeToLSMulti =
true;
1032 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1033 CanMergeToLSMulti =
false;
1037 if (PReg == ARM::SP || PReg == ARM::PC)
1038 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1042 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1057 for (
unsigned I = SIndex+1;
I < EIndex; ++
I, ++
Count) {
1058 int NewOffset = MemOps[
I].Offset;
1063 if (
Reg == ARM::SP ||
Reg == ARM::PC)
1069 unsigned RegNum = MO.
isUndef() ? std::numeric_limits<unsigned>::max()
1070 :
TRI->getEncodingValue(
Reg);
1071 bool PartOfLSMulti = CanMergeToLSMulti;
1072 if (PartOfLSMulti) {
1074 if (RegNum <= PRegNum)
1075 PartOfLSMulti =
false;
1079 else if (!isNotVFP && RegNum != PRegNum+1)
1080 PartOfLSMulti =
false;
1083 bool PartOfLSDouble = CanMergeToLSDouble &&
Count <= 1;
1085 if (!PartOfLSMulti && !PartOfLSDouble)
1087 CanMergeToLSMulti &= PartOfLSMulti;
1088 CanMergeToLSDouble &= PartOfLSDouble;
1091 unsigned Position = MemOps[
I].Position;
1092 if (Position < MemOps[Latest].Position)
1094 else if (Position > MemOps[Earliest].Position)
1102 MergeCandidate *Candidate =
new(
Allocator.Allocate()) MergeCandidate;
1103 for (
unsigned C = SIndex, CE = SIndex +
Count;
C <
CE; ++
C)
1104 Candidate->Instrs.push_back(MemOps[
C].
MI);
1105 Candidate->LatestMIIdx = Latest - SIndex;
1106 Candidate->EarliestMIIdx = Earliest - SIndex;
1107 Candidate->InsertPos = MemOps[Latest].Position;
1109 CanMergeToLSMulti = CanMergeToLSDouble =
false;
1110 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1111 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1112 Candidates.push_back(Candidate);
1115 }
while (SIndex < EIndex);
1192 switch (
MI.getOpcode()) {
1193 case ARM::tADDi8: Scale = 4; CheckCPSRDef =
true;
break;
1194 case ARM::tSUBi8: Scale = -4; CheckCPSRDef =
true;
break;
1196 case ARM::t2SUBspImm:
1197 case ARM::SUBri: Scale = -1; CheckCPSRDef =
true;
break;
1199 case ARM::t2ADDspImm:
1200 case ARM::ADDri: Scale = 1; CheckCPSRDef =
true;
break;
1201 case ARM::tADDspi: Scale = 4; CheckCPSRDef =
false;
break;
1202 case ARM::tSUBspi: Scale = -4; CheckCPSRDef =
false;
break;
1207 if (
MI.getOperand(0).getReg() !=
Reg ||
1208 MI.getOperand(1).getReg() !=
Reg ||
1210 MIPredReg != PredReg)
1213 if (CheckCPSRDef && definesCPSR(
MI))
1215 return MI.getOperand(2).getImm() * Scale;
1226 if (
MBBI == BeginMBBI)
1231 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1235 return Offset == 0 ? EndMBBI : PrevMBBI;
1247 while (NextMBBI != EndMBBI) {
1249 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1251 if (NextMBBI == EndMBBI)
1265 if (
Reg == ARM::SP || NextMBBI->readsRegister(
Reg,
TRI) ||
1266 NextMBBI->definesRegister(
Reg,
TRI))
1286bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *
MI) {
1288 if (isThumb1)
return false;
1291 const MachineOperand &BaseOP =
MI->getOperand(0);
1293 bool BaseKill = BaseOP.
isKill();
1296 unsigned Opcode =
MI->getOpcode();
1306 MachineBasicBlock &
MBB = *
MI->getParent();
1328 bool HighRegsUsed =
false;
1330 if (MO.
getReg() >= ARM::R8) {
1331 HighRegsUsed =
true;
1341 if (MergeInstr !=
MBB.
end()) {
1368 return ARM::LDR_PRE_IMM;
1370 return ARM::STR_PRE_IMM;
1381 return ARM::t2LDR_PRE;
1384 return ARM::t2STR_PRE;
1393 return ARM::LDR_POST_IMM;
1395 return ARM::STR_POST_IMM;
1406 return ARM::t2LDR_POST;
1408 case ARM::t2LDRBi12:
1409 return ARM::t2LDRB_POST;
1410 case ARM::t2LDRSBi8:
1411 case ARM::t2LDRSBi12:
1412 return ARM::t2LDRSB_POST;
1414 case ARM::t2LDRHi12:
1415 return ARM::t2LDRH_POST;
1416 case ARM::t2LDRSHi8:
1417 case ARM::t2LDRSHi12:
1418 return ARM::t2LDRSH_POST;
1421 return ARM::t2STR_POST;
1423 case ARM::t2STRBi12:
1424 return ARM::t2STRB_POST;
1426 case ARM::t2STRHi12:
1427 return ARM::t2STRH_POST;
1429 case ARM::MVE_VLDRBS16:
1430 return ARM::MVE_VLDRBS16_post;
1431 case ARM::MVE_VLDRBS32:
1432 return ARM::MVE_VLDRBS32_post;
1433 case ARM::MVE_VLDRBU16:
1434 return ARM::MVE_VLDRBU16_post;
1435 case ARM::MVE_VLDRBU32:
1436 return ARM::MVE_VLDRBU32_post;
1437 case ARM::MVE_VLDRHS32:
1438 return ARM::MVE_VLDRHS32_post;
1439 case ARM::MVE_VLDRHU32:
1440 return ARM::MVE_VLDRHU32_post;
1441 case ARM::MVE_VLDRBU8:
1442 return ARM::MVE_VLDRBU8_post;
1443 case ARM::MVE_VLDRHU16:
1444 return ARM::MVE_VLDRHU16_post;
1445 case ARM::MVE_VLDRWU32:
1446 return ARM::MVE_VLDRWU32_post;
1447 case ARM::MVE_VSTRB16:
1448 return ARM::MVE_VSTRB16_post;
1449 case ARM::MVE_VSTRB32:
1450 return ARM::MVE_VSTRB32_post;
1451 case ARM::MVE_VSTRH32:
1452 return ARM::MVE_VSTRH32_post;
1453 case ARM::MVE_VSTRBU8:
1454 return ARM::MVE_VSTRBU8_post;
1455 case ARM::MVE_VSTRHU16:
1456 return ARM::MVE_VSTRHU16_post;
1457 case ARM::MVE_VSTRWU32:
1458 return ARM::MVE_VSTRWU32_post;
1466bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *
MI) {
1469 if (isThumb1)
return false;
1474 unsigned Opcode =
MI->getOpcode();
1476 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1477 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1478 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1480 if (
MI->getOperand(2).getImm() != 0)
1487 if (
MI->getOperand(0).getReg() ==
Base)
1493 MachineBasicBlock &
MBB = *
MI->getParent();
1499 if (!isAM5 &&
Offset == Bytes) {
1501 }
else if (
Offset == -Bytes) {
1505 if (MergeInstr ==
MBB.
end())
1509 if ((isAM5 &&
Offset != Bytes) ||
1527 MachineOperand &MO =
MI->getOperand(0);
1541 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1578 MachineOperand &MO =
MI->getOperand(0);
1582 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1611bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &
MI)
const {
1612 unsigned Opcode =
MI.getOpcode();
1613 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1614 "Must have t2STRDi8 or t2LDRDi8");
1615 if (
MI.getOperand(3).getImm() != 0)
1621 const MachineOperand &BaseOp =
MI.getOperand(2);
1623 const MachineOperand &Reg0Op =
MI.getOperand(0);
1624 const MachineOperand &Reg1Op =
MI.getOperand(1);
1631 MachineBasicBlock &
MBB = *
MI.getParent();
1637 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1640 if (MergeInstr ==
MBB.
end())
1642 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1651 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1654 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1659 assert(
TII->get(Opcode).getNumOperands() == 6 &&
1660 TII->get(NewOpc).getNumOperands() == 7 &&
1661 "Unexpected number of operands in Opcode specification.");
1664 for (
const MachineOperand &MO :
MI.implicit_operands())
1676 unsigned Opcode =
MI.getOpcode();
1696 if (!
MI.getOperand(1).isReg())
1701 if (!
MI.hasOneMemOperand())
1720 if (
MI.getOperand(0).isReg() &&
MI.getOperand(0).isUndef())
1724 if (
MI.getOperand(1).isUndef())
1732 bool isDef,
unsigned NewOpc,
unsigned Reg,
1733 bool RegDeadKill,
bool RegUndef,
unsigned BaseReg,
1758bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &
MBB,
1760 MachineInstr *
MI = &*
MBBI;
1761 unsigned Opcode =
MI->getOpcode();
1764 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1767 const MachineOperand &BaseOp =
MI->getOperand(2);
1769 Register EvenReg =
MI->getOperand(0).getReg();
1770 Register OddReg =
MI->getOperand(1).getReg();
1771 unsigned EvenRegNum =
TRI->getDwarfRegNum(EvenReg,
false);
1772 unsigned OddRegNum =
TRI->getDwarfRegNum(OddReg,
false);
1776 bool Errata602117 = EvenReg ==
BaseReg &&
1777 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->
isCortexM3();
1779 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1780 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1782 if (!Errata602117 && !NonConsecutiveRegs)
1785 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1786 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1787 bool EvenDeadKill = isLd ?
1788 MI->getOperand(0).isDead() :
MI->getOperand(0).isKill();
1789 bool EvenUndef =
MI->getOperand(0).isUndef();
1790 bool OddDeadKill = isLd ?
1791 MI->getOperand(1).isDead() :
MI->getOperand(1).isKill();
1792 bool OddUndef =
MI->getOperand(1).isUndef();
1793 bool BaseKill = BaseOp.
isKill();
1794 bool BaseUndef = BaseOp.
isUndef();
1795 assert((isT2 ||
MI->getOperand(3).getReg() == ARM::NoRegister) &&
1796 "register offset not handled below");
1801 if (OddRegNum > EvenRegNum && OffImm == 0) {
1804 unsigned NewOpc = (isLd)
1805 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1806 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1828 unsigned NewOpc = (isLd)
1829 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1830 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1833 unsigned NewOpc2 = (isLd)
1834 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1835 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1838 if (isLd &&
TRI->regsOverlap(EvenReg, BaseReg)) {
1839 assert(!
TRI->regsOverlap(OddReg, BaseReg));
1841 false, BaseReg,
false, BaseUndef, Pred, PredReg,
TII,
MI);
1843 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg,
TII,
1846 if (OddReg == EvenReg && EvenDeadKill) {
1850 EvenDeadKill =
false;
1854 if (EvenReg == BaseReg)
1855 EvenDeadKill =
false;
1857 EvenUndef, BaseReg,
false, BaseUndef, Pred, PredReg,
TII,
1860 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg,
TII,
1875bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &
MBB) {
1877 unsigned CurrBase = 0;
1878 unsigned CurrOpc = ~0
u;
1880 unsigned Position = 0;
1881 assert(Candidates.size() == 0);
1883 LiveRegsValid =
false;
1888 MBBI = std::prev(
I);
1889 if (FixInvalidRegPairOp(
MBB,
MBBI))
1894 unsigned Opcode =
MBBI->getOpcode();
1895 const MachineOperand &MO =
MBBI->getOperand(0);
1901 if (CurrBase == 0) {
1906 MemOps.push_back(MemOpQueueEntry(*
MBBI,
Offset, Position));
1910 if (CurrOpc == Opcode && CurrBase ==
Base && CurrPred == Pred) {
1918 bool Overlap =
false;
1922 for (
const MemOpQueueEntry &
E : MemOps) {
1923 if (
TRI->regsOverlap(
Reg,
E.MI->getOperand(0).getReg())) {
1933 if (
Offset > MemOps.back().Offset) {
1934 MemOps.push_back(MemOpQueueEntry(*
MBBI,
Offset, Position));
1937 MemOpQueue::iterator
MI, ME;
1938 for (
MI = MemOps.begin(), ME = MemOps.end();
MI != ME; ++
MI) {
1949 if (
MI != MemOps.end()) {
1950 MemOps.insert(
MI, MemOpQueueEntry(*
MBBI,
Offset, Position));
1961 }
else if (
MBBI->isDebugInstr()) {
1963 }
else if (
MBBI->getOpcode() == ARM::t2LDRDi8 ||
1964 MBBI->getOpcode() == ARM::t2STRDi8) {
1971 if (MemOps.size() > 0) {
1972 FormCandidates(MemOps);
1980 if (MemOps.size() > 0)
1981 FormCandidates(MemOps);
1985 auto LessThan = [](
const MergeCandidate*
M0,
const MergeCandidate *
M1) {
1986 return M0->InsertPos <
M1->InsertPos;
1992 for (
const MergeCandidate *Candidate : Candidates) {
1993 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1994 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1999 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2000 MergeBaseUpdateLSDouble(*Merged);
2002 MergeBaseUpdateLSMultiple(Merged);
2004 for (MachineInstr *
MI : Candidate->Instrs) {
2005 if (MergeBaseUpdateLoadStore(
MI))
2010 assert(Candidate->Instrs.size() == 1);
2011 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2017 for (MachineInstr *
MI : MergeBaseCandidates)
2018 MergeBaseUpdateLSDouble(*
MI);
2019 MergeBaseCandidates.clear();
2034bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &
MBB) {
2036 if (isThumb1)
return false;
2041 (
MBBI->getOpcode() == ARM::BX_RET ||
2042 MBBI->getOpcode() == ARM::tBX_RET ||
2043 MBBI->getOpcode() == ARM::MOVPCLR)) {
2046 while (PrevI->isDebugInstr() && PrevI !=
MBB.
begin())
2048 MachineInstr &PrevMI = *PrevI;
2050 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2051 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2052 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2054 if (MO.
getReg() != ARM::LR)
2056 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2057 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2058 Opcode == ARM::LDMIA_UPD) &&
"Unsupported multiple load-return!");
2069bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &
MBB) {
2072 MBBI->getOpcode() != ARM::tBX_RET)
2077 if (Prev->getOpcode() != ARM::tMOVr ||
2078 !Prev->definesRegister(ARM::LR,
nullptr))
2081 for (
auto Use : Prev->uses())
2083 assert(STI->hasV4TOps());
2096bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2103 AFI = Fn.
getInfo<ARMFunctionInfo>();
2107 RegClassInfoValid =
false;
2111 bool Modified =
false, ModifiedLDMReturn =
false;
2112 for (MachineBasicBlock &
MBB : Fn) {
2115 ModifiedLDMReturn |= MergeReturnIntoLDM(
MBB);
2125 if (ModifiedLDMReturn)
2132#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2133 "ARM pre- register allocation load / store optimization pass"
2155 StringRef getPassName()
const override {
2176 bool DistributeIncrements();
2182char ARMPreAllocLoadStoreOpt::ID = 0;
2193 cl::init(8),
cl::Hidden);
2195bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(
MachineFunction &Fn) {
2199 TD = &Fn.getDataLayout();
2201 TII = STI->getInstrInfo();
2202 TRI = STI->getRegisterInfo();
2203 MRI = &Fn.getRegInfo();
2204 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2206 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2208 bool Modified = DistributeIncrements();
2210 Modified |= RescheduleLoadStoreInstrs(&MFI);
2225 if (
I->isDebugInstr() || MemOps.
count(&*
I))
2227 if (
I->isCall() ||
I->isTerminator() ||
I->hasUnmodeledSideEffects())
2229 if (
I->mayStore() || (!isLd &&
I->mayLoad()))
2231 if (
I->mayAlias(
AA, *
MemOp,
false))
2233 for (
unsigned j = 0,
NumOps =
I->getNumOperands(); j !=
NumOps; ++j) {
2246 if (MemRegs.
size() <= 4)
2249 return AddedRegPressure.
size() <= MemRegs.
size() * 2;
2252bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2253 MachineInstr *Op0, MachineInstr *Op1,
DebugLoc &dl,
unsigned &NewOpc,
2257 if (!STI->hasV5TEOps())
2263 if (Opcode == ARM::LDRi12) {
2265 }
else if (Opcode == ARM::STRi12) {
2267 }
else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2268 NewOpc = ARM::t2LDRDi8;
2271 }
else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2272 NewOpc = ARM::t2STRDi8;
2289 if (Alignment < ReqAlign)
2295 int Limit = (1 << 8) * Scale;
2296 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2305 int Limit = (1 << 8) * Scale;
2306 if (OffImm >= Limit || (OffImm & (Scale-1)))
2312 if (FirstReg == SecondReg)
2320bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2321 MachineBasicBlock *
MBB, SmallVectorImpl<MachineInstr *> &
Ops,
unsigned Base,
2322 bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2324 bool RetVal =
false;
2331 return LOffset > ROffset;
2338 while (
Ops.size() > 1) {
2339 unsigned FirstLoc = ~0
U;
2340 unsigned LastLoc = 0;
2341 MachineInstr *FirstOp =
nullptr;
2342 MachineInstr *LastOp =
nullptr;
2344 unsigned LastOpcode = 0;
2345 unsigned LastBytes = 0;
2346 unsigned NumMove = 0;
2351 if (LastOpcode && LSMOpcode != LastOpcode)
2358 if (Bytes != LastBytes ||
Offset != (LastOffset + (
int)Bytes))
2370 LastOpcode = LSMOpcode;
2372 unsigned Loc = MI2LocMap[
Op];
2373 if (Loc <= FirstLoc) {
2377 if (Loc >= LastLoc) {
2386 SmallPtrSet<MachineInstr*, 4> MemOps;
2387 SmallSet<unsigned, 4> MemRegs;
2388 for (
size_t i =
Ops.size() - NumMove, e =
Ops.size(); i != e; ++i) {
2395 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4;
2398 MemOps, MemRegs,
TRI, AA);
2400 for (
unsigned i = 0; i != NumMove; ++i)
2405 while (InsertPos !=
MBB->
end() &&
2406 (MemOps.
count(&*InsertPos) || InsertPos->isDebugInstr()))
2411 MachineInstr *Op0 =
Ops.back();
2412 MachineInstr *Op1 =
Ops[
Ops.size()-2];
2417 unsigned NewOpc = 0;
2420 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2421 FirstReg, SecondReg, BaseReg,
2422 Offset, PredReg, Pred, isT2)) {
2426 const MCInstrDesc &MCID =
TII->get(NewOpc);
2427 const TargetRegisterClass *TRC =
TII->getRegClass(MCID, 0,
TRI);
2428 MRI->constrainRegClass(FirstReg, TRC);
2429 MRI->constrainRegClass(SecondReg, TRC);
2433 MachineInstrBuilder MIB =
BuildMI(*
MBB, InsertPos, dl, MCID)
2447 MachineInstrBuilder MIB =
BuildMI(*
MBB, InsertPos, dl, MCID)
2470 for (
unsigned i = 0; i != NumMove; ++i) {
2471 MachineInstr *
Op =
Ops.pop_back_val();
2482 NumLdStMoved += NumMove;
2493 if (
MI->isNonListDebugValue()) {
2494 auto &
Op =
MI->getOperand(0);
2498 for (
unsigned I = 2;
I <
MI->getNumOperands();
I++) {
2499 auto &
Op =
MI->getOperand(
I);
2513 auto RegIt = RegisterMap.find(
Op.getReg());
2514 if (RegIt == RegisterMap.end())
2516 auto &InstrVec = RegIt->getSecond();
2523 MI->getDebugLoc()->getInlinedAt());
2528ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *
MBB) {
2529 bool RetVal =
false;
2531 DenseMap<MachineInstr *, unsigned> MI2LocMap;
2532 using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
2533 using BaseVec = SmallVector<unsigned, 4>;
2534 Base2InstMap Base2LdsMap;
2535 Base2InstMap Base2StsMap;
2541 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> RegisterMap;
2548 MachineInstr &
MI = *
MBBI;
2549 if (
MI.isCall() ||
MI.isTerminator()) {
2555 if (!
MI.isDebugInstr())
2556 MI2LocMap[&
MI] = ++Loc;
2564 int Opc =
MI.getOpcode();
2568 bool StopHere =
false;
2569 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
2572 BI->second.push_back(&
MI);
2573 Bases.push_back(
Base);
2576 for (
const MachineInstr *
MI : BI->second) {
2583 BI->second.push_back(&
MI);
2587 FindBases(Base2LdsMap, LdBases);
2589 FindBases(Base2StsMap, StBases);
2600 for (
unsigned Base : LdBases) {
2601 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[
Base];
2603 RetVal |= RescheduleOps(
MBB, Lds,
Base,
true, MI2LocMap, RegisterMap);
2607 for (
unsigned Base : StBases) {
2608 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[
Base];
2610 RetVal |= RescheduleOps(
MBB, Sts,
Base,
false, MI2LocMap, RegisterMap);
2614 Base2LdsMap.clear();
2615 Base2StsMap.clear();
2771 SmallDenseMap<DebugVariable, MachineInstr *, 8> DbgValueSinkCandidates;
2774 SmallDenseMap<MachineInstr *, SmallVector<Register>, 8> InstrMap;
2776 MachineInstr &
MI = *
MBBI;
2778 auto PopulateRegisterAndInstrMapForDebugInstr = [&](
Register Reg) {
2779 auto RegIt = RegisterMap.
find(
Reg);
2780 if (RegIt == RegisterMap.
end())
2782 auto &InstrVec = RegIt->getSecond();
2783 InstrVec.push_back(&
MI);
2784 InstrMap[&
MI].push_back(
Reg);
2787 if (
MI.isDebugValue()) {
2789 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2797 PopulateRegisterAndInstrMapForDebugInstr(
Op.getReg());
2805 auto InstrIt = DbgValueSinkCandidates.
find(DbgVar);
2806 if (InstrIt != DbgValueSinkCandidates.
end()) {
2807 auto *
Instr = InstrIt->getSecond();
2808 auto RegIt = InstrMap.
find(Instr);
2809 if (RegIt != InstrMap.
end()) {
2810 const auto &RegVec = RegIt->getSecond();
2813 for (
auto &
Reg : RegVec) {
2814 auto RegIt = RegisterMap.
find(
Reg);
2815 if (RegIt == RegisterMap.
end())
2817 auto &InstrVec = RegIt->getSecond();
2818 auto IsDbgVar = [&](MachineInstr *
I) ->
bool {
2820 return Var == DbgVar;
2826 [&](MachineOperand &
Op) {
Op.setReg(0); });
2829 DbgValueSinkCandidates[DbgVar] = &
MI;
2833 auto Opc =
MI.getOpcode();
2836 auto Reg =
MI.getOperand(0).getReg();
2837 auto RegIt = RegisterMap.
find(
Reg);
2838 if (RegIt == RegisterMap.
end())
2840 auto &DbgInstrVec = RegIt->getSecond();
2841 if (!DbgInstrVec.size())
2843 for (
auto *DbgInstr : DbgInstrVec) {
2845 auto *ClonedMI =
MI.getMF()->CloneMachineInstr(DbgInstr);
2854 DbgValueSinkCandidates.
erase(DbgVar);
2857 [&](MachineOperand &
Op) {
Op.setReg(0); });
2860 if (DbgInstr->isDebugValueList())
2874 switch (
MI.getOpcode()) {
2875 case ARM::MVE_VLDRBS16:
2876 case ARM::MVE_VLDRBS32:
2877 case ARM::MVE_VLDRBU16:
2878 case ARM::MVE_VLDRBU32:
2879 case ARM::MVE_VLDRHS32:
2880 case ARM::MVE_VLDRHU32:
2881 case ARM::MVE_VLDRBU8:
2882 case ARM::MVE_VLDRHU16:
2883 case ARM::MVE_VLDRWU32:
2884 case ARM::MVE_VSTRB16:
2885 case ARM::MVE_VSTRB32:
2886 case ARM::MVE_VSTRH32:
2887 case ARM::MVE_VSTRBU8:
2888 case ARM::MVE_VSTRHU16:
2889 case ARM::MVE_VSTRWU32:
2891 case ARM::t2LDRHi12:
2892 case ARM::t2LDRSHi8:
2893 case ARM::t2LDRSHi12:
2895 case ARM::t2LDRBi12:
2896 case ARM::t2LDRSBi8:
2897 case ARM::t2LDRSBi12:
2899 case ARM::t2STRBi12:
2901 case ARM::t2STRHi12:
2903 case ARM::MVE_VLDRBS16_post:
2904 case ARM::MVE_VLDRBS32_post:
2905 case ARM::MVE_VLDRBU16_post:
2906 case ARM::MVE_VLDRBU32_post:
2907 case ARM::MVE_VLDRHS32_post:
2908 case ARM::MVE_VLDRHU32_post:
2909 case ARM::MVE_VLDRBU8_post:
2910 case ARM::MVE_VLDRHU16_post:
2911 case ARM::MVE_VLDRWU32_post:
2912 case ARM::MVE_VSTRB16_post:
2913 case ARM::MVE_VSTRB32_post:
2914 case ARM::MVE_VSTRH32_post:
2915 case ARM::MVE_VSTRBU8_post:
2916 case ARM::MVE_VSTRHU16_post:
2917 case ARM::MVE_VSTRWU32_post:
2918 case ARM::MVE_VLDRBS16_pre:
2919 case ARM::MVE_VLDRBS32_pre:
2920 case ARM::MVE_VLDRBU16_pre:
2921 case ARM::MVE_VLDRBU32_pre:
2922 case ARM::MVE_VLDRHS32_pre:
2923 case ARM::MVE_VLDRHU32_pre:
2924 case ARM::MVE_VLDRBU8_pre:
2925 case ARM::MVE_VLDRHU16_pre:
2926 case ARM::MVE_VLDRWU32_pre:
2927 case ARM::MVE_VSTRB16_pre:
2928 case ARM::MVE_VSTRB32_pre:
2929 case ARM::MVE_VSTRH32_pre:
2930 case ARM::MVE_VSTRBU8_pre:
2931 case ARM::MVE_VSTRHU16_pre:
2932 case ARM::MVE_VSTRWU32_pre:
2939 switch (
MI.getOpcode()) {
2940 case ARM::MVE_VLDRBS16_post:
2941 case ARM::MVE_VLDRBS32_post:
2942 case ARM::MVE_VLDRBU16_post:
2943 case ARM::MVE_VLDRBU32_post:
2944 case ARM::MVE_VLDRHS32_post:
2945 case ARM::MVE_VLDRHU32_post:
2946 case ARM::MVE_VLDRBU8_post:
2947 case ARM::MVE_VLDRHU16_post:
2948 case ARM::MVE_VLDRWU32_post:
2949 case ARM::MVE_VSTRB16_post:
2950 case ARM::MVE_VSTRB32_post:
2951 case ARM::MVE_VSTRH32_post:
2952 case ARM::MVE_VSTRBU8_post:
2953 case ARM::MVE_VSTRHU16_post:
2954 case ARM::MVE_VSTRWU32_post:
2961 switch (
MI.getOpcode()) {
2962 case ARM::MVE_VLDRBS16_pre:
2963 case ARM::MVE_VLDRBS32_pre:
2964 case ARM::MVE_VLDRBU16_pre:
2965 case ARM::MVE_VLDRBU32_pre:
2966 case ARM::MVE_VLDRHS32_pre:
2967 case ARM::MVE_VLDRHU32_pre:
2968 case ARM::MVE_VLDRBU8_pre:
2969 case ARM::MVE_VLDRHU16_pre:
2970 case ARM::MVE_VLDRWU32_pre:
2971 case ARM::MVE_VSTRB16_pre:
2972 case ARM::MVE_VSTRB32_pre:
2973 case ARM::MVE_VSTRH32_pre:
2974 case ARM::MVE_VSTRBU8_pre:
2975 case ARM::MVE_VSTRHU16_pre:
2976 case ARM::MVE_VSTRWU32_pre:
2989 int &CodesizeEstimate) {
2998 CodesizeEstimate += 1;
2999 return Imm < 0 && -Imm < ((1 << 8) * 1);
3012 MI->getOperand(BaseOp).setReg(NewBaseReg);
3018 MRI.constrainRegClass(NewBaseReg, TRC);
3020 int OldOffset =
MI->getOperand(BaseOp + 1).getImm();
3022 MI->getOperand(BaseOp + 1).setImm(OldOffset -
Offset);
3024 unsigned ConvOpcode;
3025 switch (
MI->getOpcode()) {
3026 case ARM::t2LDRHi12:
3027 ConvOpcode = ARM::t2LDRHi8;
3029 case ARM::t2LDRSHi12:
3030 ConvOpcode = ARM::t2LDRSHi8;
3032 case ARM::t2LDRBi12:
3033 ConvOpcode = ARM::t2LDRBi8;
3035 case ARM::t2LDRSBi12:
3036 ConvOpcode = ARM::t2LDRSBi8;
3038 case ARM::t2STRHi12:
3039 ConvOpcode = ARM::t2STRHi8;
3041 case ARM::t2STRBi12:
3042 ConvOpcode = ARM::t2STRBi8;
3048 "Illegal Address Immediate after convert!");
3052 .
add(
MI->getOperand(0))
3053 .
add(
MI->getOperand(1))
3055 .
add(
MI->getOperand(3))
3056 .
add(
MI->getOperand(4))
3058 MI->eraseFromParent();
3075 MRI.constrainRegClass(NewReg, TRC);
3078 MRI.constrainRegClass(
MI->getOperand(1).getReg(), TRC);
3088 .
add(
MI->getOperand(0))
3089 .
add(
MI->getOperand(1))
3091 .
add(
MI->getOperand(3))
3092 .
add(
MI->getOperand(4))
3093 .
add(
MI->getOperand(5))
3096 if (
MI->mayLoad()) {
3098 .
add(
MI->getOperand(0))
3100 .
add(
MI->getOperand(1))
3102 .
add(
MI->getOperand(3))
3103 .
add(
MI->getOperand(4))
3108 .
add(
MI->getOperand(0))
3109 .
add(
MI->getOperand(1))
3111 .
add(
MI->getOperand(3))
3112 .
add(
MI->getOperand(4))
3136bool ARMPreAllocLoadStoreOpt::DistributeIncrements(
Register Base) {
3139 MachineInstr *BaseAccess =
nullptr;
3140 MachineInstr *PrePostInc =
nullptr;
3145 SmallPtrSet<MachineInstr *, 8> OtherAccesses;
3146 for (
auto &Use :
MRI->use_nodbg_instructions(
Base)) {
3156 if (!
Use.getOperand(BaseOp).isReg() ||
3157 Use.getOperand(BaseOp).getReg() !=
Base)
3161 else if (
Use.getOperand(BaseOp + 1).getImm() == 0)
3164 OtherAccesses.
insert(&Use);
3167 int IncrementOffset;
3169 if (BaseAccess && Increment) {
3173 if (
Increment->definesRegister(ARM::CPSR,
nullptr) ||
3177 LLVM_DEBUG(
dbgs() <<
"\nAttempting to distribute increments on VirtualReg "
3178 <<
Base.virtRegIndex() <<
"\n");
3182 for (MachineInstr &Use :
3183 MRI->use_nodbg_instructions(
Increment->getOperand(0).getReg())) {
3184 if (&Use == BaseAccess || (
Use.getOpcode() != TargetOpcode::PHI &&
3186 LLVM_DEBUG(
dbgs() <<
" BaseAccess doesn't dominate use of increment\n");
3196 LLVM_DEBUG(
dbgs() <<
" Illegal addressing mode immediate on postinc\n");
3200 else if (PrePostInc) {
3208 LLVM_DEBUG(
dbgs() <<
"\nAttempting to distribute increments on already "
3209 <<
"indexed VirtualReg " <<
Base.virtRegIndex() <<
"\n");
3212 BaseAccess = PrePostInc;
3226 SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
3227 int CodesizeEstimate = -1;
3228 for (
auto *Use : OtherAccesses) {
3230 SuccessorAccesses.
insert(Use);
3233 Use->getOperand(BaseOp + 1).getImm() -
3235 TII, CodesizeEstimate)) {
3236 LLVM_DEBUG(
dbgs() <<
" Illegal addressing mode immediate on use\n");
3239 }
else if (!DT->
dominates(Use, BaseAccess)) {
3241 dbgs() <<
" Unknown dominance relation between Base and Use\n");
3245 if (STI->
hasMinSize() && CodesizeEstimate > 0) {
3246 LLVM_DEBUG(
dbgs() <<
" Expected to grow instructions under minsize\n");
3254 NewBaseReg =
Increment->getOperand(0).getReg();
3255 MachineInstr *BaseAccessPost =
3259 (void)BaseAccessPost;
3263 for (
auto *Use : SuccessorAccesses) {
3271 for (MachineOperand &
Op :
MRI->use_nodbg_operands(NewBaseReg))
3272 Op.setIsKill(
false);
3276bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3278 SmallSetVector<Register, 4> Visited;
3279 for (
auto &
MBB : *MF) {
3280 for (
auto &
MI :
MBB) {
3282 if (BaseOp == -1 || !
MI.getOperand(BaseOp).isReg())
3286 if (!
Base.isVirtual())
3293 for (
auto Base : Visited)
3302 return new ARMPreAllocLoadStoreOpt();
3303 return new ARMLoadStoreOpt();
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isLoadSingle(unsigned Opc)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool ContainsReg(ArrayRef< std::pair< unsigned, bool > > Regs, unsigned Reg)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static int getMemoryOpOffset(const MachineInstr &MI)
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isThumb2Function() const
bool isThumbFunction() const
bool shouldSignReturnAddress() const
const ARMBaseInstrInfo * getInstrInfo() const override
const ARMTargetLowering * getTargetLowering() const override
const ARMBaseRegisterInfo * getRegisterInfo() const override
Align getDualLoadStoreAlignment() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A parsed version of the target data layout string in and methods for querying it.
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
Describe properties that are true of each instruction in the target description file.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
StringRef - Represent a constant reference to a string, i.e.
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ Define
Register definition.
@ Kill
The last use of a register.
@ CE
Windows NT (Windows on ARM)
This namespace contains all of the command line option processing machinery.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
NodeAddr< UseNode * > Use
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
APFloat abs(APFloat X)
Returns the absolute value of the argument.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
unsigned getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
unsigned M1(unsigned Val)
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
unsigned getKillRegState(bool B)
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
int getAddSubImmediate(MachineInstr &MI)
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
This struct is a compact representation of a valid (non-zero power of two) alignment.