LLVM 22.0.0git
AArch64PostLegalizerLowering.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
22#include "AArch64ExpandImm.h"
25#include "AArch64Subtarget.h"
45#include "llvm/IR/InstrTypes.h"
47#include <optional>
48
49#define GET_GICOMBINER_DEPS
50#include "AArch64GenPostLegalizeGILowering.inc"
51#undef GET_GICOMBINER_DEPS
52
53#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
54
55using namespace llvm;
56using namespace MIPatternMatch;
57using namespace AArch64GISelUtils;
58
59namespace {
60
61#define GET_GICOMBINER_TYPES
62#include "AArch64GenPostLegalizeGILowering.inc"
63#undef GET_GICOMBINER_TYPES
64
65/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
66///
67/// Used for matching target-supported shuffles before codegen.
68struct ShuffleVectorPseudo {
69 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
70 Register Dst; ///< Destination register.
71 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
72 ShuffleVectorPseudo(unsigned Opc, Register Dst,
73 std::initializer_list<SrcOp> SrcOps)
74 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
75 ShuffleVectorPseudo() = default;
76};
77
78/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
79/// sources of the shuffle are different.
80std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
81 unsigned NumElts) {
82 // Look for the first non-undef element.
83 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
84 if (FirstRealElt == M.end())
85 return std::nullopt;
86
87 // Use APInt to handle overflow when calculating expected element.
88 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
89 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);
90
91 // The following shuffle indices must be the successive elements after the
92 // first real element.
93 if (any_of(
94 make_range(std::next(FirstRealElt), M.end()),
95 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
96 return std::nullopt;
97
98 // The index of an EXT is the first element if it is not UNDEF.
99 // Watch out for the beginning UNDEFs. The EXT index should be the expected
100 // value of the first element. E.g.
101 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
102 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
103 // ExpectedElt is the last mask index plus 1.
104 uint64_t Imm = ExpectedElt.getZExtValue();
105 bool ReverseExt = false;
106
107 // There are two difference cases requiring to reverse input vectors.
108 // For example, for vector <4 x i32> we have the following cases,
109 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
110 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
111 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
112 // to reverse two input vectors.
113 if (Imm < NumElts)
114 ReverseExt = true;
115 else
116 Imm -= NumElts;
117 return std::make_pair(ReverseExt, Imm);
118}
119
120/// Helper function for matchINS.
121///
122/// \returns a value when \p M is an ins mask for \p NumInputElements.
123///
124/// First element of the returned pair is true when the produced
125/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
126///
127/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
128std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
129 int NumInputElements) {
130 if (M.size() != static_cast<size_t>(NumInputElements))
131 return std::nullopt;
132 int NumLHSMatch = 0, NumRHSMatch = 0;
133 int LastLHSMismatch = -1, LastRHSMismatch = -1;
134 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
135 if (M[Idx] == -1) {
136 ++NumLHSMatch;
137 ++NumRHSMatch;
138 continue;
139 }
140 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
141 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
142 }
143 const int NumNeededToMatch = NumInputElements - 1;
144 if (NumLHSMatch == NumNeededToMatch)
145 return std::make_pair(true, LastLHSMismatch);
146 if (NumRHSMatch == NumNeededToMatch)
147 return std::make_pair(false, LastRHSMismatch);
148 return std::nullopt;
149}
150
151/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
152/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
153bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
154 ShuffleVectorPseudo &MatchInfo) {
155 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
156 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
157 Register Dst = MI.getOperand(0).getReg();
158 Register Src = MI.getOperand(1).getReg();
159 LLT Ty = MRI.getType(Dst);
160 unsigned EltSize = Ty.getScalarSizeInBits();
161
162 // Element size for a rev cannot be 64.
163 if (EltSize == 64)
164 return false;
165
166 unsigned NumElts = Ty.getNumElements();
167
168 // Try to produce a G_REV instruction
169 for (unsigned LaneSize : {64U, 32U, 16U}) {
170 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
171 unsigned Opcode;
172 if (LaneSize == 64U)
173 Opcode = AArch64::G_REV64;
174 else if (LaneSize == 32U)
175 Opcode = AArch64::G_REV32;
176 else
177 Opcode = AArch64::G_REV16;
178
179 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
180 return true;
181 }
182 }
183
184 return false;
185}
186
187/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
188/// a G_TRN1 or G_TRN2 instruction.
189bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
190 ShuffleVectorPseudo &MatchInfo) {
191 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
192 unsigned WhichResult;
193 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
194 Register Dst = MI.getOperand(0).getReg();
195 unsigned NumElts = MRI.getType(Dst).getNumElements();
196 if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
197 return false;
198 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
199 Register V1 = MI.getOperand(1).getReg();
200 Register V2 = MI.getOperand(2).getReg();
201 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
202 return true;
203}
204
205/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
206/// a G_UZP1 or G_UZP2 instruction.
207///
208/// \param [in] MI - The shuffle vector instruction.
209/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
210bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
211 ShuffleVectorPseudo &MatchInfo) {
212 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
213 unsigned WhichResult;
214 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
215 Register Dst = MI.getOperand(0).getReg();
216 unsigned NumElts = MRI.getType(Dst).getNumElements();
217 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
218 return false;
219 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
220 Register V1 = MI.getOperand(1).getReg();
221 Register V2 = MI.getOperand(2).getReg();
222 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
223 return true;
224}
225
226bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
227 ShuffleVectorPseudo &MatchInfo) {
228 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
229 unsigned WhichResult;
230 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
231 Register Dst = MI.getOperand(0).getReg();
232 unsigned NumElts = MRI.getType(Dst).getNumElements();
233 if (!isZIPMask(ShuffleMask, NumElts, WhichResult))
234 return false;
235 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
236 Register V1 = MI.getOperand(1).getReg();
237 Register V2 = MI.getOperand(2).getReg();
238 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
239 return true;
240}
241
242/// Helper function for matchDup.
243bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
245 ShuffleVectorPseudo &MatchInfo) {
246 if (Lane != 0)
247 return false;
248
249 // Try to match a vector splat operation into a dup instruction.
250 // We're looking for this pattern:
251 //
252 // %scalar:gpr(s64) = COPY $x0
253 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
254 // %cst0:gpr(s32) = G_CONSTANT i32 0
255 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
256 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
257 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
258 // %zerovec(<2 x s32>)
259 //
260 // ...into:
261 // %splat = G_DUP %scalar
262
263 // Begin matching the insert.
264 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
265 MI.getOperand(1).getReg(), MRI);
266 if (!InsMI)
267 return false;
268 // Match the undef vector operand.
269 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
270 MRI))
271 return false;
272
273 // Match the index constant 0.
274 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
275 return false;
276
277 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
278 {InsMI->getOperand(2).getReg()});
279 return true;
280}
281
282/// Helper function for matchDup.
283bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
285 ShuffleVectorPseudo &MatchInfo) {
286 assert(Lane >= 0 && "Expected positive lane?");
287 int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
288 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
289 // lane's definition directly.
290 auto *BuildVecMI =
291 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
292 MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);
293 // If Lane >= NumElements then it is point to RHS, just check from RHS
294 if (NumElements <= Lane)
295 Lane -= NumElements;
296
297 if (!BuildVecMI)
298 return false;
299 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
300 MatchInfo =
301 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
302 return true;
303}
304
305bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
306 ShuffleVectorPseudo &MatchInfo) {
307 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
308 auto MaybeLane = getSplatIndex(MI);
309 if (!MaybeLane)
310 return false;
311 int Lane = *MaybeLane;
312 // If this is undef splat, generate it via "just" vdup, if possible.
313 if (Lane < 0)
314 Lane = 0;
315 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
316 return true;
317 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
318 return true;
319 return false;
320}
321
322// Check if an EXT instruction can handle the shuffle mask when the vector
323// sources of the shuffle are the same.
324bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
325 unsigned NumElts = Ty.getNumElements();
326
327 // Assume that the first shuffle index is not UNDEF. Fail if it is.
328 if (M[0] < 0)
329 return false;
330
331 // If this is a VEXT shuffle, the immediate value is the index of the first
332 // element. The other shuffle indices must be the successive elements after
333 // the first one.
334 unsigned ExpectedElt = M[0];
335 for (unsigned I = 1; I < NumElts; ++I) {
336 // Increment the expected index. If it wraps around, just follow it
337 // back to index zero and keep going.
338 ++ExpectedElt;
339 if (ExpectedElt == NumElts)
340 ExpectedElt = 0;
341
342 if (M[I] < 0)
343 continue; // Ignore UNDEF indices.
344 if (ExpectedElt != static_cast<unsigned>(M[I]))
345 return false;
346 }
347
348 return true;
349}
350
351bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
352 ShuffleVectorPseudo &MatchInfo) {
353 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
354 Register Dst = MI.getOperand(0).getReg();
355 LLT DstTy = MRI.getType(Dst);
356 Register V1 = MI.getOperand(1).getReg();
357 Register V2 = MI.getOperand(2).getReg();
358 auto Mask = MI.getOperand(3).getShuffleMask();
360 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
361 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
362
363 if (!ExtInfo) {
365 !isSingletonExtMask(Mask, DstTy))
366 return false;
367
368 Imm = Mask[0] * ExtFactor;
369 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
370 return true;
371 }
372 bool ReverseExt;
373 std::tie(ReverseExt, Imm) = *ExtInfo;
374 if (ReverseExt)
375 std::swap(V1, V2);
376 Imm *= ExtFactor;
377 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
378 return true;
379}
380
381/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
382/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
383void applyShuffleVectorPseudo(MachineInstr &MI,
384 ShuffleVectorPseudo &MatchInfo) {
385 MachineIRBuilder MIRBuilder(MI);
386 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
387 MI.eraseFromParent();
388}
389
390/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
391/// Special-cased because the constant operand must be emitted as a G_CONSTANT
392/// for the imported tablegen patterns to work.
393void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
394 MachineIRBuilder MIRBuilder(MI);
395 if (MatchInfo.SrcOps[2].getImm() == 0)
396 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
397 else {
398 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
399 auto Cst =
400 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
401 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
402 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
403 }
404 MI.eraseFromParent();
405}
406
407void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
408 Register Dst = MI.getOperand(0).getReg();
409 Register Src = MI.getOperand(1).getReg();
410 LLT DstTy = MRI.getType(Dst);
411 assert(DstTy.getSizeInBits() == 128 &&
412 "Expected 128bit vector in applyFullRev");
413 MachineIRBuilder MIRBuilder(MI);
414 auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
415 auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});
416 MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
417 MI.eraseFromParent();
418}
419
420bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
421 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
422
423 auto ValAndVReg =
424 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
425 return !ValAndVReg;
426}
427
428void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
429 MachineIRBuilder &Builder) {
431 Builder.setInstrAndDebugLoc(Insert);
432
433 Register Offset = Insert.getIndexReg();
434 LLT VecTy = MRI.getType(Insert.getReg(0));
435 LLT EltTy = MRI.getType(Insert.getElementReg());
436 LLT IdxTy = MRI.getType(Insert.getIndexReg());
437
438 if (VecTy.isScalableVector())
439 return;
440
441 // Create a stack slot and store the vector into it
442 MachineFunction &MF = Builder.getMF();
443 Align Alignment(
444 std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));
445 int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),
446 Alignment, false);
447 LLT FramePtrTy = LLT::pointer(0, 64);
449 auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);
450
451 Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));
452
453 // Get the pointer to the element, and be sure not to hit undefined behavior
454 // if the index is out of bounds.
456 "Expected a power-2 vector size");
457 auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
458 Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);
459 auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());
460 Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);
461 Register EltPtr =
462 Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)
463 .getReg(0);
464
465 // Write the inserted element
466 Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));
467 // Reload the whole vector.
468 Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));
469 Insert.eraseFromParent();
470}
471
472/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
473/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
474///
475/// e.g.
476/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
477///
478/// Can be represented as
479///
480/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
481/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
482///
483bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
484 std::tuple<Register, int, Register, int> &MatchInfo) {
485 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
486 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
487 Register Dst = MI.getOperand(0).getReg();
488 int NumElts = MRI.getType(Dst).getNumElements();
489 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
490 if (!DstIsLeftAndDstLane)
491 return false;
492 bool DstIsLeft;
493 int DstLane;
494 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
495 Register Left = MI.getOperand(1).getReg();
496 Register Right = MI.getOperand(2).getReg();
497 Register DstVec = DstIsLeft ? Left : Right;
498 Register SrcVec = Left;
499
500 int SrcLane = ShuffleMask[DstLane];
501 if (SrcLane >= NumElts) {
502 SrcVec = Right;
503 SrcLane -= NumElts;
504 }
505
506 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
507 return true;
508}
509
510void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
511 MachineIRBuilder &Builder,
512 std::tuple<Register, int, Register, int> &MatchInfo) {
513 Builder.setInstrAndDebugLoc(MI);
514 Register Dst = MI.getOperand(0).getReg();
515 auto ScalarTy = MRI.getType(Dst).getElementType();
516 Register DstVec, SrcVec;
517 int DstLane, SrcLane;
518 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
519 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
520 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
521 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
522 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
523 MI.eraseFromParent();
524}
525
526/// isVShiftRImm - Check if this is a valid vector for the immediate
527/// operand of a vector shift right operation. The value must be in the range:
528/// 1 <= Value <= ElementBits for a right shift.
530 int64_t &Cnt) {
531 assert(Ty.isVector() && "vector shift count is not a vector type");
532 MachineInstr *MI = MRI.getVRegDef(Reg);
533 auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
534 if (!Cst)
535 return false;
536 Cnt = *Cst;
537 int64_t ElementBits = Ty.getScalarSizeInBits();
538 return Cnt >= 1 && Cnt <= ElementBits;
539}
540
541/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
542bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
543 int64_t &Imm) {
544 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
545 MI.getOpcode() == TargetOpcode::G_LSHR);
546 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
547 if (!Ty.isVector())
548 return false;
549 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
550}
551
552void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
553 int64_t &Imm) {
554 unsigned Opc = MI.getOpcode();
555 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
556 unsigned NewOpc =
557 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
558 MachineIRBuilder MIB(MI);
559 auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
560 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
561 MI.eraseFromParent();
562}
563
564/// Determine if it is possible to modify the \p RHS and predicate \p P of a
565/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
566///
567/// \returns A pair containing the updated immediate and predicate which may
568/// be used to optimize the instruction.
569///
570/// \note This assumes that the comparison has been legalized.
571std::optional<std::pair<uint64_t, CmpInst::Predicate>>
572tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
573 const MachineRegisterInfo &MRI) {
574 const auto &Ty = MRI.getType(RHS);
575 if (Ty.isVector())
576 return std::nullopt;
577 unsigned Size = Ty.getSizeInBits();
578 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
579
580 // If the RHS is not a constant, or the RHS is already a valid arithmetic
581 // immediate, then there is nothing to change.
582 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
583 if (!ValAndVReg)
584 return std::nullopt;
585 uint64_t OriginalC = ValAndVReg->Value.getZExtValue();
586 uint64_t C = OriginalC;
587 if (isLegalArithImmed(C))
588 return std::nullopt;
589
590 // We have a non-arithmetic immediate. Check if adjusting the immediate and
591 // adjusting the predicate will result in a legal arithmetic immediate.
592 switch (P) {
593 default:
594 return std::nullopt;
597 // Check for
598 //
599 // x slt c => x sle c - 1
600 // x sge c => x sgt c - 1
601 //
602 // When c is not the smallest possible negative number.
603 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
604 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
605 return std::nullopt;
606 P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
607 C -= 1;
608 break;
611 // Check for
612 //
613 // x ult c => x ule c - 1
614 // x uge c => x ugt c - 1
615 //
616 // When c is not zero.
617 assert(C != 0 && "C should not be zero here!");
618 P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
619 C -= 1;
620 break;
623 // Check for
624 //
625 // x sle c => x slt c + 1
626 // x sgt c => s sge c + 1
627 //
628 // When c is not the largest possible signed integer.
629 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
630 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
631 return std::nullopt;
632 P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
633 C += 1;
634 break;
637 // Check for
638 //
639 // x ule c => x ult c + 1
640 // x ugt c => s uge c + 1
641 //
642 // When c is not the largest possible unsigned integer.
643 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
644 (Size == 64 && C == UINT64_MAX))
645 return std::nullopt;
646 P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
647 C += 1;
648 break;
649 }
650
651 // Check if the new constant is valid, and return the updated constant and
652 // predicate if it is.
653 if (Size == 32)
654 C = static_cast<uint32_t>(C);
655 if (isLegalArithImmed(C))
656 return {{C, P}};
657
658 auto NumberOfInstrToLoadImm = [=](uint64_t Imm) {
660 AArch64_IMM::expandMOVImm(Imm, 32, Insn);
661 return Insn.size();
662 };
663
664 if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(C))
665 return {{C, P}};
666
667 return std::nullopt;
668}
669
670/// Determine whether or not it is possible to update the RHS and predicate of
671/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
672/// immediate.
673///
674/// \p MI - The G_ICMP instruction
675/// \p MatchInfo - The new RHS immediate and predicate on success
676///
677/// See tryAdjustICmpImmAndPred for valid transformations.
678bool matchAdjustICmpImmAndPred(
680 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
681 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
682 Register RHS = MI.getOperand(3).getReg();
683 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
684 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
685 MatchInfo = *MaybeNewImmAndPred;
686 return true;
687 }
688 return false;
689}
690
691void applyAdjustICmpImmAndPred(
692 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
693 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
695 MachineOperand &RHS = MI.getOperand(3);
697 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
698 MatchInfo.first);
699 Observer.changingInstr(MI);
700 RHS.setReg(Cst->getOperand(0).getReg());
701 MI.getOperand(1).setPredicate(MatchInfo.second);
702 Observer.changedInstr(MI);
703}
704
705bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
706 std::pair<unsigned, int> &MatchInfo) {
707 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
708 Register Src1Reg = MI.getOperand(1).getReg();
709 const LLT SrcTy = MRI.getType(Src1Reg);
710 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
711
712 auto LaneIdx = getSplatIndex(MI);
713 if (!LaneIdx)
714 return false;
715
716 // The lane idx should be within the first source vector.
717 if (*LaneIdx >= SrcTy.getNumElements())
718 return false;
719
720 if (DstTy != SrcTy)
721 return false;
722
723 LLT ScalarTy = SrcTy.getElementType();
724 unsigned ScalarSize = ScalarTy.getSizeInBits();
725
726 unsigned Opc = 0;
727 switch (SrcTy.getNumElements()) {
728 case 2:
729 if (ScalarSize == 64)
730 Opc = AArch64::G_DUPLANE64;
731 else if (ScalarSize == 32)
732 Opc = AArch64::G_DUPLANE32;
733 break;
734 case 4:
735 if (ScalarSize == 32)
736 Opc = AArch64::G_DUPLANE32;
737 else if (ScalarSize == 16)
738 Opc = AArch64::G_DUPLANE16;
739 break;
740 case 8:
741 if (ScalarSize == 8)
742 Opc = AArch64::G_DUPLANE8;
743 else if (ScalarSize == 16)
744 Opc = AArch64::G_DUPLANE16;
745 break;
746 case 16:
747 if (ScalarSize == 8)
748 Opc = AArch64::G_DUPLANE8;
749 break;
750 default:
751 break;
752 }
753 if (!Opc)
754 return false;
755
756 MatchInfo.first = Opc;
757 MatchInfo.second = *LaneIdx;
758 return true;
759}
760
761void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
762 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
763 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
764 Register Src1Reg = MI.getOperand(1).getReg();
765 const LLT SrcTy = MRI.getType(Src1Reg);
766
767 B.setInstrAndDebugLoc(MI);
768 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
769
770 Register DupSrc = MI.getOperand(1).getReg();
771 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
772 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
773 if (SrcTy.getSizeInBits() == 64) {
774 auto Undef = B.buildUndef(SrcTy);
775 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
776 {Src1Reg, Undef.getReg(0)})
777 .getReg(0);
778 }
779 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
780 MI.eraseFromParent();
781}
782
783bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
784 auto &Unmerge = cast<GUnmerge>(MI);
785 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
786 const LLT SrcTy = MRI.getType(Src1Reg);
787 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
788 return false;
789 return SrcTy.isVector() && !SrcTy.isScalable() &&
790 Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
791}
792
793void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
795 auto &Unmerge = cast<GUnmerge>(MI);
796 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
797 const LLT SrcTy = MRI.getType(Src1Reg);
798 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
799 "Expected a fixed length vector");
800
801 for (int I = 0; I < SrcTy.getNumElements(); ++I)
802 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
803 MI.eraseFromParent();
804}
805
806bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
807 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
808
809 // Later, during selection, we'll try to match imported patterns using
810 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
811 // G_BUILD_VECTORs which could match those patterns.
813 return false;
814
815 return getAArch64VectorSplat(MI, MRI).has_value();
816}
817
818void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
820 B.setInstrAndDebugLoc(MI);
821 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
822 {MI.getOperand(1).getReg()});
823 MI.eraseFromParent();
824}
825
826/// \returns how many instructions would be saved by folding a G_ICMP's shift
827/// and/or extension operations.
829 // No instructions to save if there's more than one use or no uses.
830 if (!MRI.hasOneNonDBGUse(CmpOp))
831 return 0;
832
833 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
834 auto IsSupportedExtend = [&](const MachineInstr &MI) {
835 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
836 return true;
837 if (MI.getOpcode() != TargetOpcode::G_AND)
838 return false;
839 auto ValAndVReg =
840 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
841 if (!ValAndVReg)
842 return false;
843 uint64_t Mask = ValAndVReg->Value.getZExtValue();
844 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
845 };
846
848 if (IsSupportedExtend(*Def))
849 return 1;
850
851 unsigned Opc = Def->getOpcode();
852 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
853 Opc != TargetOpcode::G_LSHR)
854 return 0;
855
856 auto MaybeShiftAmt =
857 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
858 if (!MaybeShiftAmt)
859 return 0;
860 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
861 MachineInstr *ShiftLHS =
862 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
863
864 // Check if we can fold an extend and a shift.
865 // FIXME: This is duplicated with the selector. (See:
866 // selectArithExtendedRegister)
867 if (IsSupportedExtend(*ShiftLHS))
868 return (ShiftAmt <= 4) ? 2 : 1;
869
870 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
871 if (Ty.isVector())
872 return 0;
873 unsigned ShiftSize = Ty.getSizeInBits();
874 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
875 (ShiftSize == 64 && ShiftAmt <= 63))
876 return 1;
877 return 0;
878}
879
880/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
881/// instruction \p MI.
882bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
883 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
884 // Swap the operands if it would introduce a profitable folding opportunity.
885 // (e.g. a shift + extend).
886 //
887 // For example:
888 // lsl w13, w11, #1
889 // cmp w13, w12
890 // can be turned into:
891 // cmp w12, w11, lsl #1
892
893 // Don't swap if there's a constant on the RHS, because we know we can fold
894 // that.
895 Register RHS = MI.getOperand(3).getReg();
897 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
898 return false;
899
900 Register LHS = MI.getOperand(2).getReg();
901 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
902 auto GetRegForProfit = [&](Register Reg) {
904 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
905 };
906
907 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
908 // compare, would we be able to fold more instructions?
909 Register TheLHS = GetRegForProfit(LHS);
910 Register TheRHS = GetRegForProfit(RHS);
911
912 // If the LHS is more likely to give us a folding opportunity, then swap the
913 // LHS and RHS.
914 return (getCmpOperandFoldingProfit(TheLHS, MRI) >
916}
917
918void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
919 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
920 Register LHS = MI.getOperand(2).getReg();
921 Register RHS = MI.getOperand(3).getReg();
922 Observer.changedInstr(MI);
923 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
924 MI.getOperand(2).setReg(RHS);
925 MI.getOperand(3).setReg(LHS);
926 Observer.changedInstr(MI);
927}
928
929/// \returns a function which builds a vector floating point compare instruction
930/// for a condition code \p CC.
931/// \param [in] NoNans - True if the target has NoNansFPMath.
932std::function<Register(MachineIRBuilder &)>
933getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
935 LLT DstTy = MRI.getType(LHS);
936 assert(DstTy.isVector() && "Expected vector types only?");
937 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
938 switch (CC) {
939 default:
940 llvm_unreachable("Unexpected condition code!");
941 case AArch64CC::NE:
942 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
943 auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
944 return MIB.buildNot(DstTy, FCmp).getReg(0);
945 };
946 case AArch64CC::EQ:
947 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
948 return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
949 };
950 case AArch64CC::GE:
951 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
952 return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
953 };
954 case AArch64CC::GT:
955 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
956 return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
957 };
958 case AArch64CC::LS:
959 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
960 return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
961 };
962 case AArch64CC::MI:
963 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
964 return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
965 };
966 }
967}
968
969/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
970bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
971 MachineIRBuilder &MIB) {
972 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
973 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
974
975 Register Dst = MI.getOperand(0).getReg();
976 LLT DstTy = MRI.getType(Dst);
977 if (!DstTy.isVector() || !ST.hasNEON())
978 return false;
979 Register LHS = MI.getOperand(2).getReg();
980 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
981 if (EltSize == 16 && !ST.hasFullFP16())
982 return false;
983 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
984 return false;
985
986 return true;
987}
988
989/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
990void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
991 MachineIRBuilder &MIB) {
992 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
993 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
994
995 const auto &CmpMI = cast<GFCmp>(MI);
996
997 Register Dst = CmpMI.getReg(0);
998 CmpInst::Predicate Pred = CmpMI.getCond();
999 Register LHS = CmpMI.getLHSReg();
1000 Register RHS = CmpMI.getRHSReg();
1001
1002 LLT DstTy = MRI.getType(Dst);
1003
1004 bool Invert = false;
1006 if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1008 isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
1009 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1010 // NaN, so equivalent to a == a and doesn't need the two comparisons an
1011 // "ord" normally would.
1012 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1013 // thus equivalent to a != a.
1014 RHS = LHS;
1016 } else
1017 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1018
1019 // Instead of having an apply function, just build here to simplify things.
1021
1022 const bool NoNans =
1023 ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
1024
1025 auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
1026 Register CmpRes;
1027 if (CC2 == AArch64CC::AL)
1028 CmpRes = Cmp(MIB);
1029 else {
1030 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
1031 auto Cmp2Dst = Cmp2(MIB);
1032 auto Cmp1Dst = Cmp(MIB);
1033 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1034 }
1035 if (Invert)
1036 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1037 MRI.replaceRegWith(Dst, CmpRes);
1038 MI.eraseFromParent();
1039}
1040
1041// Matches G_BUILD_VECTOR where at least one source operand is not a constant
1042bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {
1043 auto *GBuildVec = cast<GBuildVector>(&MI);
1044
1045 // Check if the values are all constants
1046 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1047 auto ConstVal =
1048 getAnyConstantVRegValWithLookThrough(GBuildVec->getSourceReg(I), MRI);
1049
1050 if (!ConstVal.has_value())
1051 return true;
1052 }
1053
1054 return false;
1055}
1056
1057void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
1059 auto *GBuildVec = cast<GBuildVector>(&MI);
1060 LLT DstTy = MRI.getType(GBuildVec->getReg(0));
1061 Register DstReg = B.buildUndef(DstTy).getReg(0);
1062
1063 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1064 Register SrcReg = GBuildVec->getSourceReg(I);
1065 if (mi_match(SrcReg, MRI, m_GImplicitDef()))
1066 continue;
1067 auto IdxReg = B.buildConstant(LLT::scalar(64), I);
1068 DstReg =
1069 B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);
1070 }
1071 B.buildCopy(GBuildVec->getReg(0), DstReg);
1072 GBuildVec->eraseFromParent();
1073}
1074
1075bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1076 Register &SrcReg) {
1077 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1078 Register DstReg = MI.getOperand(0).getReg();
1079 if (MRI.getType(DstReg).isVector())
1080 return false;
1081 // Match a store of a truncate.
1082 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1083 return false;
1084 // Only form truncstores for value types of max 64b.
1085 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1086}
1087
1088void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1090 Register &SrcReg) {
1091 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1092 Observer.changingInstr(MI);
1093 MI.getOperand(0).setReg(SrcReg);
1094 Observer.changedInstr(MI);
1095}
1096
1097// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1098// form in the first place for combine opportunities, so any remaining ones
1099// at this stage need be lowered back.
1100bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1101 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1102 Register DstReg = MI.getOperand(0).getReg();
1103 LLT DstTy = MRI.getType(DstReg);
1104 return DstTy.isVector();
1105}
1106
1107void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1109 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1110 B.setInstrAndDebugLoc(MI);
1111 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1112 Helper.lower(MI, 0, /* Unused hint type */ LLT());
1113}
1114
1115/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1116/// => unused, <N x t> = unmerge v
1117bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1118 Register &MatchInfo) {
1119 auto &Unmerge = cast<GUnmerge>(MI);
1120 if (Unmerge.getNumDefs() != 2)
1121 return false;
1122 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
1123 return false;
1124
1125 LLT DstTy = MRI.getType(Unmerge.getReg(0));
1126 if (!DstTy.isVector())
1127 return false;
1128
1129 MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
1130 if (!Ext)
1131 return false;
1132
1133 Register ExtSrc1 = Ext->getOperand(1).getReg();
1134 Register ExtSrc2 = Ext->getOperand(2).getReg();
1135 auto LowestVal =
1136 getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
1137 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1138 return false;
1139
1140 if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
1141 return false;
1142
1143 MatchInfo = ExtSrc1;
1144 return true;
1145}
1146
1147void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1149 GISelChangeObserver &Observer, Register &SrcReg) {
1150 Observer.changingInstr(MI);
1151 // Swap dst registers.
1152 Register Dst1 = MI.getOperand(0).getReg();
1153 MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1154 MI.getOperand(1).setReg(Dst1);
1155 MI.getOperand(2).setReg(SrcReg);
1156 Observer.changedInstr(MI);
1157}
1158
1159// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1160// Match v2s64 mul instructions, which will then be scalarised later on
1161// Doing these two matches in one function to ensure that the order of matching
1162// will always be the same.
1163// Try lowering MUL to MULL before trying to scalarize if needed.
1164bool matchMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI) {
1165 // Get the instructions that defined the source operand
1166 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1167 return DstTy == LLT::fixed_vector(2, 64);
1168}
1169
1170void applyMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI,
1172 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1173 "Expected a G_MUL instruction");
1174
1175 // Get the instructions that defined the source operand
1176 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1177 assert(DstTy == LLT::fixed_vector(2, 64) && "Expected v2s64 Mul");
1178 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1179 Helper.fewerElementsVector(
1180 MI, 0,
1182}
1183
1184class AArch64PostLegalizerLoweringImpl : public Combiner {
1185protected:
1186 const CombinerHelper Helper;
1187 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1188 const AArch64Subtarget &STI;
1189
1190public:
1191 AArch64PostLegalizerLoweringImpl(
1192 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1193 GISelCSEInfo *CSEInfo,
1194 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1195 const AArch64Subtarget &STI);
1196
1197 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1198
1199 bool tryCombineAll(MachineInstr &I) const override;
1200
1201private:
1202#define GET_GICOMBINER_CLASS_MEMBERS
1203#include "AArch64GenPostLegalizeGILowering.inc"
1204#undef GET_GICOMBINER_CLASS_MEMBERS
1205};
1206
1207#define GET_GICOMBINER_IMPL
1208#include "AArch64GenPostLegalizeGILowering.inc"
1209#undef GET_GICOMBINER_IMPL
1210
1211AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1212 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1213 GISelCSEInfo *CSEInfo,
1214 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1215 const AArch64Subtarget &STI)
1216 : Combiner(MF, CInfo, TPC, /*VT*/ nullptr, CSEInfo),
1217 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
1218 STI(STI),
1220#include "AArch64GenPostLegalizeGILowering.inc"
1222{
1223}
1224
1225class AArch64PostLegalizerLowering : public MachineFunctionPass {
1226public:
1227 static char ID;
1228
1229 AArch64PostLegalizerLowering();
1230
1231 StringRef getPassName() const override {
1232 return "AArch64PostLegalizerLowering";
1233 }
1234
1235 bool runOnMachineFunction(MachineFunction &MF) override;
1236 void getAnalysisUsage(AnalysisUsage &AU) const override;
1237
1238private:
1239 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1240};
1241} // end anonymous namespace
1242
1243void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1245 AU.setPreservesCFG();
1248}
1249
1250AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1252 if (!RuleConfig.parseCommandLineOption())
1253 report_fatal_error("Invalid rule identifier");
1254}
1255
1256bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1257 if (MF.getProperties().hasFailedISel())
1258 return false;
1259 assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");
1260 auto *TPC = &getAnalysis<TargetPassConfig>();
1261 const Function &F = MF.getFunction();
1262
1264 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
1265 /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
1266 F.hasOptSize(), F.hasMinSize());
1267 // Disable fixed-point iteration to reduce compile-time
1268 CInfo.MaxIterations = 1;
1269 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
1270 // PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.
1271 CInfo.EnableFullDCE = false;
1272 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
1273 RuleConfig, ST);
1274 return Impl.combineMachineInstrs();
1275}
1276
1277char AArch64PostLegalizerLowering::ID = 0;
1278INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1279 "Lower AArch64 MachineInstrs after legalization", false,
1280 false)
1282INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1283 "Lower AArch64 MachineInstrs after legalization", false,
1284 false)
1285
1286namespace llvm {
1288 return new AArch64PostLegalizerLowering();
1289}
1290} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
This contains common code to allow clients to notify changes to machine instr.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned logBase2() const
Definition APInt.h:1761
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:687
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:688
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:829
Combiner implementation.
Definition Combiner.h:34
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The CSE Analysis object.
Definition CSEInfo.h:71
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition Register.h:19
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Target-Independent Code Generator Pass Configuration Options.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define UINT64_MAX
Definition DataTypes.h:77
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
constexpr bool isLegalArithImmed(const uint64_t C)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred, const MachineRegisterInfo &MRI)
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ImplicitDefMatch m_GImplicitDef()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
@ Undef
Value of the register doesn't matter.
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1482
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResult)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> or <1,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492
FunctionPass * createAArch64PostLegalizerLowering()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1714
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439
LLVM_ABI bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1488
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1185
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1740
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.