45#define DEBUG_TYPE "a15-sd-optimizer"
54 StringRef getPassName()
const override {
return "ARM A15 S->D optimizer"; }
78 const DebugLoc &
DL,
unsigned Ssub0,
unsigned Ssub1);
88 unsigned Lane,
unsigned ToInsert);
100 unsigned getDPRLaneFromSPR(
unsigned SReg);
115 unsigned getPrefSPRLane(
unsigned SReg);
125 std::map<MachineInstr*, unsigned> Replacements;
126 std::set<MachineInstr *> DeadInstr;
128 char A15SDOptimizer::ID = 0;
139 return MRI->getRegClass(
Reg)->hasSuperClassEq(TRC);
144unsigned A15SDOptimizer::getDPRLaneFromSPR(
unsigned SReg) {
146 TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
154unsigned A15SDOptimizer::getPrefSPRLane(
unsigned SReg) {
155 if (!Register::isVirtualRegister(SReg))
156 return getDPRLaneFromSPR(SReg);
158 MachineInstr *
MI =
MRI->getVRegDef(SReg);
159 if (!
MI)
return ARM::ssub_0;
160 MachineOperand *MO =
MI->findRegisterDefOperand(SReg,
nullptr);
161 if (!MO)
return ARM::ssub_0;
162 assert(MO->
isReg() &&
"Non-register operand found!");
164 if (
MI->isCopy() && usesRegClass(
MI->getOperand(1),
165 &ARM::SPRRegClass)) {
166 SReg =
MI->getOperand(1).getReg();
169 if (Register::isVirtualRegister(SReg)) {
170 if (MO->
getSubReg() == ARM::ssub_1)
return ARM::ssub_1;
173 return getDPRLaneFromSPR(SReg);
178void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *
MI) {
179 SmallVector<MachineInstr *, 8> Front;
180 DeadInstr.insert(
MI);
185 while (Front.
size() != 0) {
190 for (MachineOperand &MO :
MI->operands()) {
196 MachineOperand *
Op =
MI->findRegisterDefOperand(
Reg,
nullptr);
201 MachineInstr *
Def =
Op->getParent();
205 if (DeadInstr.find(Def) != DeadInstr.end())
212 for (MachineOperand &MODef :
Def->operands()) {
213 if ((!MODef.isReg()) || (!MODef.isDef()))
220 for (MachineInstr &Use :
MRI->use_instructions(
Reg)) {
224 if (DeadInstr.find(&Use) == DeadInstr.end()) {
234 DeadInstr.insert(Def);
241unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *
MI) {
243 return optimizeAllLanesPattern(
MI,
MI->getOperand(1).getReg());
246 if (
MI->isInsertSubreg()) {
251 MachineInstr *DPRMI =
MRI->getVRegDef(
MI->getOperand(1).getReg());
252 MachineInstr *SPRMI =
MRI->getVRegDef(
MI->getOperand(2).getReg());
254 if (DPRMI && SPRMI) {
256 MachineInstr *ECDef = elideCopies(DPRMI);
261 MachineInstr *
EC = elideCopies(SPRMI);
263 if (EC &&
EC->isCopy() &&
264 EC->getOperand(1).getSubReg() == ARM::ssub_0) {
270 const TargetRegisterClass *TRC =
271 MRI->getRegClass(
MI->getOperand(1).getReg());
275 eraseInstrWithNoUses(
MI);
280 return optimizeAllLanesPattern(
MI,
MI->getOperand(2).getReg());
284 return optimizeAllLanesPattern(
MI,
MI->getOperand(0).getReg());
287 if (
MI->isRegSequence() && usesRegClass(
MI->getOperand(1),
288 &ARM::SPRRegClass)) {
291 unsigned NumImplicit = 0, NumTotal = 0;
292 unsigned NonImplicitReg = ~0
U;
303 MachineInstr *
Def =
MRI->getVRegDef(OpReg);
306 if (
Def->isImplicitDef())
309 NonImplicitReg = MO.
getReg();
312 if (NumImplicit == NumTotal - 1)
313 return optimizeAllLanesPattern(
MI, NonImplicitReg);
315 return optimizeAllLanesPattern(
MI,
MI->getOperand(0).getReg());
323bool A15SDOptimizer::hasPartialWrite(MachineInstr *
MI) {
326 if (
MI->isCopy() && usesRegClass(
MI->getOperand(1), &ARM::SPRRegClass))
329 if (
MI->isInsertSubreg() && usesRegClass(
MI->getOperand(2),
333 if (
MI->isRegSequence() && usesRegClass(
MI->getOperand(1), &ARM::SPRRegClass))
341MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *
MI) {
342 if (!
MI->isFullCopy())
344 if (!
MI->getOperand(1).getReg().isVirtual())
346 MachineInstr *
Def =
MRI->getVRegDef(
MI->getOperand(1).getReg());
349 return elideCopies(Def);
354void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *
MI,
355 SmallVectorImpl<MachineInstr*> &Outs) {
358 std::set<MachineInstr *> Reached;
359 SmallVector<MachineInstr *, 8> Front;
361 while (Front.
size() != 0) {
365 if (!Reached.insert(
MI).second)
368 for (
unsigned I = 1,
E =
MI->getNumOperands();
I !=
E;
I += 2) {
373 MachineInstr *NewMI =
MRI->getVRegDef(
Reg);
378 }
else if (
MI->isFullCopy()) {
379 if (!
MI->getOperand(1).getReg().isVirtual())
381 MachineInstr *NewMI =
MRI->getVRegDef(
MI->getOperand(1).getReg());
394SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *
MI) {
395 if (
MI->isCopyLike() ||
MI->isInsertSubreg() ||
MI->isRegSequence() ||
397 return SmallVector<unsigned, 8>();
399 SmallVector<unsigned, 8> Defs;
400 for (MachineOperand &MO :
MI->operands()) {
403 if (!usesRegClass(MO, &ARM::DPRRegClass) &&
404 !usesRegClass(MO, &ARM::QPRRegClass) &&
405 !usesRegClass(MO, &ARM::DPairRegClass))
414unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &
MBB,
417 unsigned Lane,
bool QPR) {
419 MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : &ARM::DPRRegClass);
421 TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
430unsigned A15SDOptimizer::createExtractSubreg(
432 const DebugLoc &
DL,
unsigned DReg,
unsigned Lane,
433 const TargetRegisterClass *TRC) {
438 TII->get(TargetOpcode::COPY), Out)
445unsigned A15SDOptimizer::createRegSequence(
447 const DebugLoc &
DL,
unsigned Reg1,
unsigned Reg2) {
448 Register Out =
MRI->createVirtualRegister(&ARM::QPRRegClass);
452 TII->get(TargetOpcode::REG_SEQUENCE), Out)
462unsigned A15SDOptimizer::createVExt(MachineBasicBlock &
MBB,
466 Register Out =
MRI->createVirtualRegister(&ARM::DPRRegClass);
475unsigned A15SDOptimizer::createInsertSubreg(
477 const DebugLoc &
DL,
unsigned DReg,
unsigned Lane,
unsigned ToInsert) {
478 Register Out =
MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
482 TII->get(TargetOpcode::INSERT_SUBREG), Out)
491A15SDOptimizer::createImplicitDef(MachineBasicBlock &
MBB,
494 Register Out =
MRI->createVirtualRegister(&ARM::DPRRegClass);
498 TII->get(TargetOpcode::IMPLICIT_DEF), Out);
506A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *
MI,
unsigned Reg) {
509 MachineBasicBlock &
MBB = *
MI->getParent();
515 if (
MRI->getRegClass(
Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
516 MRI->getRegClass(
Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
517 unsigned DSub0 = createExtractSubreg(
MBB, InsertPt,
DL,
Reg,
518 ARM::dsub_0, &ARM::DPRRegClass);
519 unsigned DSub1 = createExtractSubreg(
MBB, InsertPt,
DL,
Reg,
520 ARM::dsub_1, &ARM::DPRRegClass);
522 unsigned Out1 = createDupLane(
MBB, InsertPt,
DL, DSub0, 0);
523 unsigned Out2 = createDupLane(
MBB, InsertPt,
DL, DSub0, 1);
524 Out = createVExt(
MBB, InsertPt,
DL, Out1, Out2);
526 unsigned Out3 = createDupLane(
MBB, InsertPt,
DL, DSub1, 0);
527 unsigned Out4 = createDupLane(
MBB, InsertPt,
DL, DSub1, 1);
528 Out2 = createVExt(
MBB, InsertPt,
DL, Out3, Out4);
530 Out = createRegSequence(
MBB, InsertPt,
DL, Out, Out2);
532 }
else if (
MRI->getRegClass(
Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
533 unsigned Out1 = createDupLane(
MBB, InsertPt,
DL,
Reg, 0);
534 unsigned Out2 = createDupLane(
MBB, InsertPt,
DL,
Reg, 1);
535 Out = createVExt(
MBB, InsertPt,
DL, Out1, Out2);
538 assert(
MRI->getRegClass(
Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
539 "Found unexpected regclass!");
541 unsigned PrefLane = getPrefSPRLane(
Reg);
544 case ARM::ssub_0: Lane = 0;
break;
545 case ARM::ssub_1: Lane = 1;
break;
550 bool UsesQPR = usesRegClass(
MI->getOperand(0), &ARM::QPRRegClass) ||
551 usesRegClass(
MI->getOperand(0), &ARM::DPairRegClass);
553 Out = createImplicitDef(
MBB, InsertPt,
DL);
554 Out = createInsertSubreg(
MBB, InsertPt,
DL, Out, PrefLane,
Reg);
555 Out = createDupLane(
MBB, InsertPt,
DL, Out, Lane, UsesQPR);
556 eraseInstrWithNoUses(
MI);
561bool A15SDOptimizer::runOnInstruction(MachineInstr *
MI) {
592 SmallVector<unsigned, 8> Defs = getReadDPRs(
MI);
595 for (
unsigned I : Defs) {
600 SmallVector<MachineInstr *, 8> DefSrcs;
601 if (!Register::isVirtualRegister(
I))
603 MachineInstr *
Def =
MRI->getVRegDef(
I);
607 elideCopiesAndPHIs(Def, DefSrcs);
609 for (MachineInstr *
MI : DefSrcs) {
612 if (Replacements.find(
MI) != Replacements.end())
616 if (!hasPartialWrite(
MI))
620 Register DPRDefReg =
MI->getOperand(0).getReg();
625 unsigned NewReg = optimizeSDPattern(
MI);
629 for (MachineOperand *Use :
Uses) {
635 MRI->constrainRegClass(NewReg,
MRI->getRegClass(
Use->getReg()));
639 Use->substVirtReg(NewReg, 0, *
TRI);
642 Replacements[
MI] = NewReg;
648bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
652 const ARMSubtarget &STI = Fn.
getSubtarget<ARMSubtarget>();
655 if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
666 Replacements.clear();
668 for (MachineBasicBlock &
MBB : Fn) {
669 for (MachineInstr &
MI :
MBB) {
674 for (MachineInstr *
MI : DeadInstr) {
675 MI->eraseFromParent();
682 return new A15SDOptimizer();
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Remove Loads Into Fake Uses
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
const ARMBaseInstrInfo * getInstrInfo() const override
const ARMBaseRegisterInfo * getRegisterInfo() const override
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool isImplicitDef() const
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
NodeAddr< DefNode * > Def
NodeAddr< UseNode * > Use
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FunctionPass * createA15SDOptimizerPass()
DWARFExpression::Operation Op
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.