clang 22.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
23
24#include <numeric>
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30static std::optional<CodeGenFunction::MSVCIntrin>
31translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
32 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
33 switch (BuiltinID) {
34 default:
35 return std::nullopt;
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
185 }
186 llvm_unreachable("must return from switch");
187}
188
189static std::optional<CodeGenFunction::MSVCIntrin>
190translateArmToMsvcIntrin(unsigned BuiltinID) {
191 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
192 switch (BuiltinID) {
193 default:
194 return std::nullopt;
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
336 }
337 llvm_unreachable("must return from switch");
338}
339
340// Emit an intrinsic where all operands are of the same type as the result.
341// Depending on mode, this may be a constrained floating-point intrinsic.
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
345 llvm::Type *Ty,
346 ArrayRef<Value *> Args) {
347 Function *F;
348 if (CGF.Builder.getIsFPConstrained())
349 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
350 else
351 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
352
353 if (CGF.Builder.getIsFPConstrained())
354 return CGF.Builder.CreateConstrainedFPCall(F, Args);
355 else
356 return CGF.Builder.CreateCall(F, Args);
357}
358
359static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
360 NeonTypeFlags TypeFlags,
361 bool HasFastHalfType = true,
362 bool V1Ty = false,
363 bool AllowBFloatArgsAndRet = true) {
364 int IsQuad = TypeFlags.isQuad();
365 switch (TypeFlags.getEltType()) {
369 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
372 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
376 else
377 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
379 if (HasFastHalfType)
380 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
381 else
382 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
406 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count, C);
419 return Builder.CreateShuffleVector(V, V, SV, "lane");
420}
421
423 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, EC);
425}
426
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
444 else
445 return Builder.CreateCall(F, Ops, name);
446}
447
451 const CallExpr *E, const char *name) {
452 llvm::Value *FPM =
453 EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
454 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
455 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
456}
457
459 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
460 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
461
462 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
463 RetTy->getPrimitiveSizeInBits();
464 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
465 Ops[1]->getType()};
466 if (ExtendLaneArg) {
467 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
468 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
469 uint64_t(0));
470 }
471 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
472}
473
475 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
476 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
477
478 if (ExtendLaneArg) {
479 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
480 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
481 uint64_t(0));
482 }
483 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
484 RetTy->getPrimitiveSizeInBits();
485 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
486 Ops, E, name);
487}
488
490 bool neg) {
491 int SV = cast<ConstantInt>(V)->getSExtValue();
492 return ConstantInt::get(Ty, neg ? -SV : SV);
493}
494
495Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
496 llvm::Type *Ty1, bool Extract,
498 const CallExpr *E,
499 const char *name) {
500 llvm::Type *Tys[] = {Ty0, Ty1};
501 if (Extract) {
502 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
503 // the vector.
504 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
505 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
506 }
507 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
508}
509
510// Right-shift a vector by a constant.
512 llvm::Type *Ty, bool usgn,
513 const char *name) {
514 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
515
516 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
517 int EltSize = VTy->getScalarSizeInBits();
518
519 Vec = Builder.CreateBitCast(Vec, Ty);
520
521 // lshr/ashr are undefined when the shift amount is equal to the vector
522 // element size.
523 if (ShiftAmt == EltSize) {
524 if (usgn) {
525 // Right-shifting an unsigned value by its size yields 0.
526 return llvm::ConstantAggregateZero::get(VTy);
527 } else {
528 // Right-shifting a signed value by its size is equivalent
529 // to a shift of size-1.
530 --ShiftAmt;
531 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
532 }
533 }
534
535 Shift = EmitNeonShiftVector(Shift, Ty, false);
536 if (usgn)
537 return Builder.CreateLShr(Vec, Shift, name);
538 else
539 return Builder.CreateAShr(Vec, Shift, name);
540}
541
542enum {
543 AddRetType = (1 << 0),
544 Add1ArgType = (1 << 1),
545 Add2ArgTypes = (1 << 2),
546
549
550 InventFloatType = (1 << 5),
551 UnsignedAlts = (1 << 6),
552
553 Use64BitVectors = (1 << 7),
555
562};
563
564namespace {
565struct ARMVectorIntrinsicInfo {
566 const char *NameHint;
567 unsigned BuiltinID;
568 unsigned LLVMIntrinsic;
569 unsigned AltLLVMIntrinsic;
571
572 bool operator<(unsigned RHSBuiltinID) const {
573 return BuiltinID < RHSBuiltinID;
574 }
575 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
576 return BuiltinID < TE.BuiltinID;
577 }
578};
579} // end anonymous namespace
580
581#define NEONMAP0(NameBase) \
582 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
583
584#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
585 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
586 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
587
588#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
589 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
590 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
591 TypeModifier }
592
593static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
594 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
595 NEONMAP0(splat_lane_v),
596 NEONMAP0(splat_laneq_v),
597 NEONMAP0(splatq_lane_v),
598 NEONMAP0(splatq_laneq_v),
599 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
600 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
601 NEONMAP1(vabs_v, arm_neon_vabs, 0),
602 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
603 NEONMAP0(vadd_v),
604 NEONMAP0(vaddhn_v),
605 NEONMAP0(vaddq_v),
606 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
607 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
608 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
609 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
610 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
611 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
613 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
614 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
615 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
616 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
617 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
618 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
619 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
620 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
621 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
622 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
623 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
624 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
625 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
626 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
627 NEONMAP1(vcage_v, arm_neon_vacge, 0),
628 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
629 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
630 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcale_v, arm_neon_vacge, 0),
632 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
633 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
634 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
635 NEONMAP0(vceqz_v),
636 NEONMAP0(vceqzq_v),
637 NEONMAP0(vcgez_v),
638 NEONMAP0(vcgezq_v),
639 NEONMAP0(vcgtz_v),
640 NEONMAP0(vcgtzq_v),
641 NEONMAP0(vclez_v),
642 NEONMAP0(vclezq_v),
643 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
644 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
645 NEONMAP0(vcltz_v),
646 NEONMAP0(vcltzq_v),
647 NEONMAP1(vclz_v, ctlz, Add1ArgType),
648 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
649 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
650 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
651 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
652 NEONMAP0(vcvt_f16_s16),
653 NEONMAP0(vcvt_f16_u16),
654 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
655 NEONMAP0(vcvt_f32_v),
656 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
657 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
658 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
659 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
660 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
663 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
665 NEONMAP0(vcvt_s16_f16),
666 NEONMAP0(vcvt_s32_v),
667 NEONMAP0(vcvt_s64_v),
668 NEONMAP0(vcvt_u16_f16),
669 NEONMAP0(vcvt_u32_v),
670 NEONMAP0(vcvt_u64_v),
671 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
672 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
675 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
678 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
681 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
684 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
685 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
688 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
691 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
694 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
697 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
700 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
703 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
706 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
709 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
712 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
715 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
718 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
720 NEONMAP0(vcvtq_f16_s16),
721 NEONMAP0(vcvtq_f16_u16),
722 NEONMAP0(vcvtq_f32_v),
723 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
724 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
725 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
726 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
727 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
730 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
732 NEONMAP0(vcvtq_s16_f16),
733 NEONMAP0(vcvtq_s32_v),
734 NEONMAP0(vcvtq_s64_v),
735 NEONMAP0(vcvtq_u16_f16),
736 NEONMAP0(vcvtq_u32_v),
737 NEONMAP0(vcvtq_u64_v),
738 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
739 NEONMAP1(vdot_u32, arm_neon_udot, 0),
740 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
741 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
742 NEONMAP0(vext_v),
743 NEONMAP0(vextq_v),
744 NEONMAP0(vfma_v),
745 NEONMAP0(vfmaq_v),
746 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
747 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
748 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
749 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
750 NEONMAP0(vld1_dup_v),
751 NEONMAP1(vld1_v, arm_neon_vld1, 0),
752 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
753 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
754 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
755 NEONMAP0(vld1q_dup_v),
756 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
757 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
758 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
759 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
760 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
761 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
762 NEONMAP1(vld2_v, arm_neon_vld2, 0),
763 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
764 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
765 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
766 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
767 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
768 NEONMAP1(vld3_v, arm_neon_vld3, 0),
769 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
770 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
771 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
772 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
773 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
774 NEONMAP1(vld4_v, arm_neon_vld4, 0),
775 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
776 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
777 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
778 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
779 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
780 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
781 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
782 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
783 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
784 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
785 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
786 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
787 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
788 NEONMAP0(vmovl_v),
789 NEONMAP0(vmovn_v),
790 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
791 NEONMAP0(vmull_v),
792 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
793 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
794 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
795 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
796 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
797 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
798 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
799 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
800 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
801 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
802 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
803 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
804 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
805 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
806 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
807 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
808 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
809 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
810 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
811 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
812 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
813 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
814 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
815 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
816 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
817 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
818 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
819 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
820 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
821 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
822 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
823 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
824 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
825 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
826 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
827 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
828 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
829 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
830 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
831 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
832 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
833 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
834 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
835 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
836 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
837 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
838 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
839 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
840 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
841 NEONMAP1(vrnd_v, trunc, Add1ArgType),
842 NEONMAP1(vrnda_v, round, Add1ArgType),
843 NEONMAP1(vrndaq_v, round, Add1ArgType),
844 NEONMAP0(vrndi_v),
845 NEONMAP0(vrndiq_v),
846 NEONMAP1(vrndm_v, floor, Add1ArgType),
847 NEONMAP1(vrndmq_v, floor, Add1ArgType),
848 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
849 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
850 NEONMAP1(vrndp_v, ceil, Add1ArgType),
851 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
852 NEONMAP1(vrndq_v, trunc, Add1ArgType),
853 NEONMAP1(vrndx_v, rint, Add1ArgType),
854 NEONMAP1(vrndxq_v, rint, Add1ArgType),
855 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
856 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
857 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
858 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
859 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
860 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
861 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
862 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
863 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
864 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
865 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
866 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
867 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
868 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
869 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
870 NEONMAP0(vshl_n_v),
871 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
872 NEONMAP0(vshll_n_v),
873 NEONMAP0(vshlq_n_v),
874 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
875 NEONMAP0(vshr_n_v),
876 NEONMAP0(vshrn_n_v),
877 NEONMAP0(vshrq_n_v),
878 NEONMAP1(vst1_v, arm_neon_vst1, 0),
879 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
880 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
881 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
882 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
883 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
884 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
885 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
886 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
887 NEONMAP1(vst2_v, arm_neon_vst2, 0),
888 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
889 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
890 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
891 NEONMAP1(vst3_v, arm_neon_vst3, 0),
892 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
893 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
894 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
895 NEONMAP1(vst4_v, arm_neon_vst4, 0),
896 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
897 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
898 NEONMAP0(vsubhn_v),
899 NEONMAP0(vtrn_v),
900 NEONMAP0(vtrnq_v),
901 NEONMAP0(vtst_v),
902 NEONMAP0(vtstq_v),
903 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
904 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
906 NEONMAP0(vuzp_v),
907 NEONMAP0(vuzpq_v),
908 NEONMAP0(vzip_v),
909 NEONMAP0(vzipq_v)
910};
911
912static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
913 NEONMAP0(splat_lane_v),
914 NEONMAP0(splat_laneq_v),
915 NEONMAP0(splatq_lane_v),
916 NEONMAP0(splatq_laneq_v),
917 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
918 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
919 NEONMAP0(vadd_v),
920 NEONMAP0(vaddhn_v),
921 NEONMAP0(vaddq_p128),
922 NEONMAP0(vaddq_v),
923 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
924 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
925 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
926 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
927 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
928 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
929 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
930 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
931 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
932 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
933 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
934 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
935 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
936 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
938 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
939 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
940 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
941 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
942 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
943 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
944 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
945 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
946 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
947 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
948 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
949 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
950 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
951 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
953 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
955 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
957 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
958 NEONMAP0(vceqz_v),
959 NEONMAP0(vceqzq_v),
960 NEONMAP0(vcgez_v),
961 NEONMAP0(vcgezq_v),
962 NEONMAP0(vcgtz_v),
963 NEONMAP0(vcgtzq_v),
964 NEONMAP0(vclez_v),
965 NEONMAP0(vclezq_v),
966 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
967 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
968 NEONMAP0(vcltz_v),
969 NEONMAP0(vcltzq_v),
970 NEONMAP1(vclz_v, ctlz, Add1ArgType),
971 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
972 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
973 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
974 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
975 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
976 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
977 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
978 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
979 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
980 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
981 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
982 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
983 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
984 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
985 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
986 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
987 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
988 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
989 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
990 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
991 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
992 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
993 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
994 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
995 NEONMAP0(vcvt_f16_s16),
996 NEONMAP0(vcvt_f16_u16),
997 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
998 NEONMAP0(vcvt_f32_v),
999 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1000 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1001 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1002 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1004 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1007 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1009 NEONMAP0(vcvtq_f16_s16),
1010 NEONMAP0(vcvtq_f16_u16),
1011 NEONMAP0(vcvtq_f32_v),
1012 NEONMAP0(vcvtq_high_bf16_f32),
1013 NEONMAP0(vcvtq_low_bf16_f32),
1014 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1015 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1016 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1017 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1019 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1022 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1024 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
1025 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1026 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1027 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1028 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1029 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1030 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1031 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1032 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1033 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1034 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1035 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1036 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1037 NEONMAP0(vext_v),
1038 NEONMAP0(vextq_v),
1039 NEONMAP0(vfma_v),
1040 NEONMAP0(vfmaq_v),
1041 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1042 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1043 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1044 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1045 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1046 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1047 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1048 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1049 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1050 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1051 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1052 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1053 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1054 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1055 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1056 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1057 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1058 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1059 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1060 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1061 NEONMAP0(vmovl_v),
1062 NEONMAP0(vmovn_v),
1063 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
1064 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
1065 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
1066 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1067 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1068 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
1069 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
1070 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
1071 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1072 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1073 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1074 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1075 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1076 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1077 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
1078 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1079 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1080 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
1081 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
1082 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
1083 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
1084 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
1085 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
1086 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1087 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1088 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1089 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1090 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1091 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1092 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1093 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1094 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1095 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1096 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
1097 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1098 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1099 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
1100 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1101 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1102 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
1103 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1104 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
1105 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1106 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1107 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1108 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1109 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1110 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
1111 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1112 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1113 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1114 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
1115 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
1116 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1117 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1118 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
1119 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
1120 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
1121 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
1122 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
1123 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
1124 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
1125 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
1126 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
1127 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
1128 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
1129 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
1130 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
1131 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
1132 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
1133 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
1134 NEONMAP0(vrndi_v),
1135 NEONMAP0(vrndiq_v),
1136 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1137 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1138 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1139 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1140 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1141 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1142 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
1143 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
1144 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
1145 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1146 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1147 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1148 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1149 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1150 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1151 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1152 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1153 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1154 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1155 NEONMAP0(vshl_n_v),
1156 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1157 NEONMAP0(vshll_n_v),
1158 NEONMAP0(vshlq_n_v),
1159 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1160 NEONMAP0(vshr_n_v),
1161 NEONMAP0(vshrn_n_v),
1162 NEONMAP0(vshrq_n_v),
1163 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1164 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1165 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1166 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1167 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1168 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1169 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1170 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1171 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1172 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1173 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1174 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1175 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1176 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1177 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1178 NEONMAP0(vsubhn_v),
1179 NEONMAP0(vtst_v),
1180 NEONMAP0(vtstq_v),
1181 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1182 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1184 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1185};
1186
1187static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
1188 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
1189 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
1190 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
1191 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1192 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1193 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1194 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1195 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1196 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1197 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1198 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1199 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1200 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1201 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
1202 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1203 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1204 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1205 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1206 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1207 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1208 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1209 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1210 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1211 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1212 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1213 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1214 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1215 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1216 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1217 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1218 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1219 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1220 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1221 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1222 NEONMAP0(vcvth_bf16_f32),
1223 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1224 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1225 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1226 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1227 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1228 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1229 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1230 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1231 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1232 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1233 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1234 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1235 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1236 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1237 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1238 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1239 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1240 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1241 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1242 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1243 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1244 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1245 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1246 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1247 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1248 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1249 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1250 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1251 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1252 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1253 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1254 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1255 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1256 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1257 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1258 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1259 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1260 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1261 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1262 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1263 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
1264 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1265 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1266 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1267 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1268 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1269 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1270 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1271 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1272 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1273 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1274 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1275 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1276 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
1277 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1278 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
1279 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1280 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1281 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
1282 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
1283 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1284 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1285 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
1286 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
1287 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
1288 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
1289 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
1290 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1291 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
1292 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
1293 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1294 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1295 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1296 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1297 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
1298 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1299 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1300 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1301 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
1302 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1303 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
1304 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
1305 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1306 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
1307 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1308 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
1309 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
1310 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1311 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1312 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
1313 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
1314 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1315 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1316 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
1317 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
1318 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
1319 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
1320 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1321 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1322 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1323 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1324 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
1325 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1326 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1327 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1328 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1329 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1330 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1331 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
1332 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
1333 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1334 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1335 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1336 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1337 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
1338 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
1339 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
1340 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
1341 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1342 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1343 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
1344 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
1345 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
1346 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1347 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1348 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1349 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1350 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
1351 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1352 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1353 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1354 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1355 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
1356 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
1357 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1358 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1359 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
1360 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
1361 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
1362 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
1363 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
1364 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
1365 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
1366 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
1367 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
1368 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
1369 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
1370 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
1371 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1372 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1373 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1374 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1375 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
1376 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
1377 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
1378 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
1379 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1380 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
1381 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1382 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
1383 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
1384 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
1385 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1386 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
1387 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1388 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
1389 // FP16 scalar intrinisics go here.
1390 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
1391 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1392 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1393 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1394 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1395 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1396 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1397 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1398 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1399 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1400 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1401 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1402 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1403 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1404 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1405 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1406 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1407 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1408 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1409 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1410 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1411 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1412 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1413 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1414 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1415 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1416 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1417 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1418 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1419 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
1420 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
1421 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
1422 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
1423 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
1424};
1425
1426// Some intrinsics are equivalent for codegen.
1427static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
1428 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1429 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1430 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1431 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1432 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1433 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1434 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1435 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1436 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1437 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1438 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1439 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1440 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1441 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1442 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1443 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1444 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1445 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1446 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1447 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1448 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1449 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1450 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1451 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1452 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1453 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1454 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1455 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1456 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1457 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1458 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1459 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1460 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1461 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1462 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1463 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1464 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1465 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1466 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1467 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1468 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1469 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1470 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1471 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1472 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1473 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1474 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1475 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1476 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1477 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1478 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1479 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1480 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1481 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1482 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1483 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1484 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1485 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1486 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1487 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1488 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1489 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1490 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1491 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1492 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1493 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1494 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1495 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1496 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1497 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1498 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1499 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1500 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1501 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1502 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1503 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1504 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1505 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1506 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1507 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1508 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1509 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1510 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1511 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1512 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1513 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1514 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1515 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1516 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1517 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1518 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1519 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1520 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1521 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1522 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1523 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1524 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1525 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1526 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1527 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1528 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1529 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1530 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1531 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1532 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1533 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1534 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1535 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1536 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1537 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1538 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1539 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1540 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1541 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1542 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1543 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1544 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1545 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1546 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1547 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1548 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1549 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1550 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1551 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1552 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1553 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1554 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1555 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1556 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
1557 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
1558 // arbitrary one to be handled as tha canonical variation.
1559 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1560 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1561 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1562 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1563 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1564 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1565 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1566 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1567 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1568 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1569 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1570 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1571};
1572
1573#undef NEONMAP0
1574#undef NEONMAP1
1575#undef NEONMAP2
1576
1577#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1578 { \
1579 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1580 TypeModifier \
1581 }
1582
1583#define SVEMAP2(NameBase, TypeModifier) \
1584 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1585static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
1586#define GET_SVE_LLVM_INTRINSIC_MAP
1587#include "clang/Basic/arm_sve_builtin_cg.inc"
1588#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1589#undef GET_SVE_LLVM_INTRINSIC_MAP
1590};
1591
1592#undef SVEMAP1
1593#undef SVEMAP2
1594
1595#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1596 { \
1597 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1598 TypeModifier \
1599 }
1600
1601#define SMEMAP2(NameBase, TypeModifier) \
1602 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1603static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
1604#define GET_SME_LLVM_INTRINSIC_MAP
1605#include "clang/Basic/arm_sme_builtin_cg.inc"
1606#undef GET_SME_LLVM_INTRINSIC_MAP
1607};
1608
1609#undef SMEMAP1
1610#undef SMEMAP2
1611
1613
1618
1619static const ARMVectorIntrinsicInfo *
1621 unsigned BuiltinID, bool &MapProvenSorted) {
1622
1623#ifndef NDEBUG
1624 if (!MapProvenSorted) {
1625 assert(llvm::is_sorted(IntrinsicMap));
1626 MapProvenSorted = true;
1627 }
1628#endif
1629
1630 const ARMVectorIntrinsicInfo *Builtin =
1631 llvm::lower_bound(IntrinsicMap, BuiltinID);
1632
1633 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1634 return Builtin;
1635
1636 return nullptr;
1637}
1638
1640 unsigned Modifier,
1641 llvm::Type *ArgType,
1642 const CallExpr *E) {
1643 int VectorSize = 0;
1644 if (Modifier & Use64BitVectors)
1645 VectorSize = 64;
1646 else if (Modifier & Use128BitVectors)
1647 VectorSize = 128;
1648
1649 // Return type.
1651 if (Modifier & AddRetType) {
1652 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1653 if (Modifier & VectorizeRetType)
1654 Ty = llvm::FixedVectorType::get(
1655 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1656
1657 Tys.push_back(Ty);
1658 }
1659
1660 // Arguments.
1661 if (Modifier & VectorizeArgTypes) {
1662 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1663 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1664 }
1665
1666 if (Modifier & (Add1ArgType | Add2ArgTypes))
1667 Tys.push_back(ArgType);
1668
1669 if (Modifier & Add2ArgTypes)
1670 Tys.push_back(ArgType);
1671
1672 if (Modifier & InventFloatType)
1673 Tys.push_back(FloatTy);
1674
1675 return CGM.getIntrinsic(IntrinsicID, Tys);
1676}
1677
1679 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1680 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1681 unsigned BuiltinID = SISDInfo.BuiltinID;
1682 unsigned int Int = SISDInfo.LLVMIntrinsic;
1683 unsigned Modifier = SISDInfo.TypeModifier;
1684 const char *s = SISDInfo.NameHint;
1685
1686 switch (BuiltinID) {
1687 case NEON::BI__builtin_neon_vcled_s64:
1688 case NEON::BI__builtin_neon_vcled_u64:
1689 case NEON::BI__builtin_neon_vcles_f32:
1690 case NEON::BI__builtin_neon_vcled_f64:
1691 case NEON::BI__builtin_neon_vcltd_s64:
1692 case NEON::BI__builtin_neon_vcltd_u64:
1693 case NEON::BI__builtin_neon_vclts_f32:
1694 case NEON::BI__builtin_neon_vcltd_f64:
1695 case NEON::BI__builtin_neon_vcales_f32:
1696 case NEON::BI__builtin_neon_vcaled_f64:
1697 case NEON::BI__builtin_neon_vcalts_f32:
1698 case NEON::BI__builtin_neon_vcaltd_f64:
1699 // Only one direction of comparisons actually exist, cmle is actually a cmge
1700 // with swapped operands. The table gives us the right intrinsic but we
1701 // still need to do the swap.
1702 std::swap(Ops[0], Ops[1]);
1703 break;
1704 }
1705
1706 assert(Int && "Generic code assumes a valid intrinsic");
1707
1708 // Determine the type(s) of this overloaded AArch64 intrinsic.
1709 const Expr *Arg = E->getArg(0);
1710 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
1711 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
1712
1713 int j = 0;
1714 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1715 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1716 ai != ae; ++ai, ++j) {
1717 llvm::Type *ArgTy = ai->getType();
1718 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1719 ArgTy->getPrimitiveSizeInBits())
1720 continue;
1721
1722 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
1723 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1724 // it before inserting.
1725 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1726 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1727 Ops[j] =
1728 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1729 }
1730
1731 Value *Result = CGF.EmitNeonCall(F, Ops, s);
1732 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1733 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1734 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1735 return CGF.Builder.CreateExtractElement(Result, C0);
1736
1737 return CGF.Builder.CreateBitCast(Result, ResultType, s);
1738}
1739
1741 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1742 const char *NameHint, unsigned Modifier, const CallExpr *E,
1743 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1744 llvm::Triple::ArchType Arch) {
1745 // Get the last argument, which specifies the vector type.
1746 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1747 std::optional<llvm::APSInt> NeonTypeConst =
1749 if (!NeonTypeConst)
1750 return nullptr;
1751
1752 // Determine the type of this overloaded NEON intrinsic.
1753 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1754 const bool Usgn = Type.isUnsigned();
1755 const bool Quad = Type.isQuad();
1756 const bool Floating = Type.isFloatingPoint();
1757 const bool HasFastHalfType = getTarget().hasFastHalfType();
1758 const bool AllowBFloatArgsAndRet =
1759 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1760
1761 llvm::FixedVectorType *VTy =
1762 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1763 llvm::Type *Ty = VTy;
1764 if (!Ty)
1765 return nullptr;
1766
1767 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1768 return Builder.getInt32(addr.getAlignment().getQuantity());
1769 };
1770
1771 unsigned Int = LLVMIntrinsic;
1772 if ((Modifier & UnsignedAlts) && !Usgn)
1773 Int = AltLLVMIntrinsic;
1774
1775 switch (BuiltinID) {
1776 default: break;
1777 case NEON::BI__builtin_neon_splat_lane_v:
1778 case NEON::BI__builtin_neon_splat_laneq_v:
1779 case NEON::BI__builtin_neon_splatq_lane_v:
1780 case NEON::BI__builtin_neon_splatq_laneq_v: {
1781 auto NumElements = VTy->getElementCount();
1782 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1783 NumElements = NumElements * 2;
1784 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1785 NumElements = NumElements.divideCoefficientBy(2);
1786
1787 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1788 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1789 }
1790 case NEON::BI__builtin_neon_vpadd_v:
1791 case NEON::BI__builtin_neon_vpaddq_v:
1792 // We don't allow fp/int overloading of intrinsics.
1793 if (VTy->getElementType()->isFloatingPointTy() &&
1794 Int == Intrinsic::aarch64_neon_addp)
1795 Int = Intrinsic::aarch64_neon_faddp;
1796 break;
1797 case NEON::BI__builtin_neon_vabs_v:
1798 case NEON::BI__builtin_neon_vabsq_v:
1799 if (VTy->getElementType()->isFloatingPointTy())
1800 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1801 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1802 case NEON::BI__builtin_neon_vadd_v:
1803 case NEON::BI__builtin_neon_vaddq_v: {
1804 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1805 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1806 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1807 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1808 return Builder.CreateBitCast(Ops[0], Ty);
1809 }
1810 case NEON::BI__builtin_neon_vaddhn_v: {
1811 llvm::FixedVectorType *SrcTy =
1812 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1813
1814 // %sum = add <4 x i32> %lhs, %rhs
1815 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1816 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1817 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1818
1819 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1820 Constant *ShiftAmt =
1821 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1822 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1823
1824 // %res = trunc <4 x i32> %high to <4 x i16>
1825 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1826 }
1827 case NEON::BI__builtin_neon_vcale_v:
1828 case NEON::BI__builtin_neon_vcaleq_v:
1829 case NEON::BI__builtin_neon_vcalt_v:
1830 case NEON::BI__builtin_neon_vcaltq_v:
1831 std::swap(Ops[0], Ops[1]);
1832 [[fallthrough]];
1833 case NEON::BI__builtin_neon_vcage_v:
1834 case NEON::BI__builtin_neon_vcageq_v:
1835 case NEON::BI__builtin_neon_vcagt_v:
1836 case NEON::BI__builtin_neon_vcagtq_v: {
1837 llvm::Type *Ty;
1838 switch (VTy->getScalarSizeInBits()) {
1839 default: llvm_unreachable("unexpected type");
1840 case 32:
1841 Ty = FloatTy;
1842 break;
1843 case 64:
1844 Ty = DoubleTy;
1845 break;
1846 case 16:
1847 Ty = HalfTy;
1848 break;
1849 }
1850 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1851 llvm::Type *Tys[] = { VTy, VecFlt };
1852 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1853 return EmitNeonCall(F, Ops, NameHint);
1854 }
1855 case NEON::BI__builtin_neon_vceqz_v:
1856 case NEON::BI__builtin_neon_vceqzq_v:
1858 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1859 case NEON::BI__builtin_neon_vcgez_v:
1860 case NEON::BI__builtin_neon_vcgezq_v:
1862 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1863 "vcgez");
1864 case NEON::BI__builtin_neon_vclez_v:
1865 case NEON::BI__builtin_neon_vclezq_v:
1867 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1868 "vclez");
1869 case NEON::BI__builtin_neon_vcgtz_v:
1870 case NEON::BI__builtin_neon_vcgtzq_v:
1872 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1873 "vcgtz");
1874 case NEON::BI__builtin_neon_vcltz_v:
1875 case NEON::BI__builtin_neon_vcltzq_v:
1877 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1878 "vcltz");
1879 case NEON::BI__builtin_neon_vclz_v:
1880 case NEON::BI__builtin_neon_vclzq_v:
1881 // We generate target-independent intrinsic, which needs a second argument
1882 // for whether or not clz of zero is undefined; on ARM it isn't.
1883 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1884 break;
1885 case NEON::BI__builtin_neon_vcvt_f32_v:
1886 case NEON::BI__builtin_neon_vcvtq_f32_v:
1887 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1888 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1889 HasFastHalfType);
1890 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1891 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1892 case NEON::BI__builtin_neon_vcvt_f16_s16:
1893 case NEON::BI__builtin_neon_vcvt_f16_u16:
1894 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1895 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1896 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1897 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1898 HasFastHalfType);
1899 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1900 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1901 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1902 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1903 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1904 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1905 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1906 Function *F = CGM.getIntrinsic(Int, Tys);
1907 return EmitNeonCall(F, Ops, "vcvt_n");
1908 }
1909 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1910 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1911 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1912 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1913 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1914 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1915 Function *F = CGM.getIntrinsic(Int, Tys);
1916 return EmitNeonCall(F, Ops, "vcvt_n");
1917 }
1918 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1919 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1920 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1921 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1922 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1923 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1924 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1925 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1926 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1927 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1928 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1929 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1930 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1931 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1932 return EmitNeonCall(F, Ops, "vcvt_n");
1933 }
1934 case NEON::BI__builtin_neon_vcvt_s32_v:
1935 case NEON::BI__builtin_neon_vcvt_u32_v:
1936 case NEON::BI__builtin_neon_vcvt_s64_v:
1937 case NEON::BI__builtin_neon_vcvt_u64_v:
1938 case NEON::BI__builtin_neon_vcvt_s16_f16:
1939 case NEON::BI__builtin_neon_vcvt_u16_f16:
1940 case NEON::BI__builtin_neon_vcvtq_s32_v:
1941 case NEON::BI__builtin_neon_vcvtq_u32_v:
1942 case NEON::BI__builtin_neon_vcvtq_s64_v:
1943 case NEON::BI__builtin_neon_vcvtq_u64_v:
1944 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1945 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1946 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
1947 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1948 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1949 }
1950 case NEON::BI__builtin_neon_vcvta_s16_f16:
1951 case NEON::BI__builtin_neon_vcvta_s32_v:
1952 case NEON::BI__builtin_neon_vcvta_s64_v:
1953 case NEON::BI__builtin_neon_vcvta_u16_f16:
1954 case NEON::BI__builtin_neon_vcvta_u32_v:
1955 case NEON::BI__builtin_neon_vcvta_u64_v:
1956 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1957 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1958 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1959 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1960 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1961 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1962 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1963 case NEON::BI__builtin_neon_vcvtn_s32_v:
1964 case NEON::BI__builtin_neon_vcvtn_s64_v:
1965 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1966 case NEON::BI__builtin_neon_vcvtn_u32_v:
1967 case NEON::BI__builtin_neon_vcvtn_u64_v:
1968 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1969 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1970 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1971 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1972 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1973 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1974 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1975 case NEON::BI__builtin_neon_vcvtp_s32_v:
1976 case NEON::BI__builtin_neon_vcvtp_s64_v:
1977 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1978 case NEON::BI__builtin_neon_vcvtp_u32_v:
1979 case NEON::BI__builtin_neon_vcvtp_u64_v:
1980 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1981 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1982 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1983 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1984 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1985 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1986 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1987 case NEON::BI__builtin_neon_vcvtm_s32_v:
1988 case NEON::BI__builtin_neon_vcvtm_s64_v:
1989 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1990 case NEON::BI__builtin_neon_vcvtm_u32_v:
1991 case NEON::BI__builtin_neon_vcvtm_u64_v:
1992 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1993 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1994 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1995 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1996 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1997 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1998 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1999 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2000 }
2001 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2002 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2003 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2004
2005 }
2006 case NEON::BI__builtin_neon_vext_v:
2007 case NEON::BI__builtin_neon_vextq_v: {
2008 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2009 SmallVector<int, 16> Indices;
2010 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2011 Indices.push_back(i+CV);
2012
2013 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2014 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2015 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
2016 }
2017 case NEON::BI__builtin_neon_vfma_v:
2018 case NEON::BI__builtin_neon_vfmaq_v: {
2019 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2020 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2021 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2022
2023 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2025 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2026 {Ops[1], Ops[2], Ops[0]});
2027 }
2028 case NEON::BI__builtin_neon_vld1_v:
2029 case NEON::BI__builtin_neon_vld1q_v: {
2030 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2031 Ops.push_back(getAlignmentValue32(PtrOp0));
2032 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
2033 }
2034 case NEON::BI__builtin_neon_vld1_x2_v:
2035 case NEON::BI__builtin_neon_vld1q_x2_v:
2036 case NEON::BI__builtin_neon_vld1_x3_v:
2037 case NEON::BI__builtin_neon_vld1q_x3_v:
2038 case NEON::BI__builtin_neon_vld1_x4_v:
2039 case NEON::BI__builtin_neon_vld1q_x4_v: {
2040 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
2041 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2042 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
2043 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2044 }
2045 case NEON::BI__builtin_neon_vld2_v:
2046 case NEON::BI__builtin_neon_vld2q_v:
2047 case NEON::BI__builtin_neon_vld3_v:
2048 case NEON::BI__builtin_neon_vld3q_v:
2049 case NEON::BI__builtin_neon_vld4_v:
2050 case NEON::BI__builtin_neon_vld4q_v:
2051 case NEON::BI__builtin_neon_vld2_dup_v:
2052 case NEON::BI__builtin_neon_vld2q_dup_v:
2053 case NEON::BI__builtin_neon_vld3_dup_v:
2054 case NEON::BI__builtin_neon_vld3q_dup_v:
2055 case NEON::BI__builtin_neon_vld4_dup_v:
2056 case NEON::BI__builtin_neon_vld4q_dup_v: {
2057 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2058 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2059 Value *Align = getAlignmentValue32(PtrOp1);
2060 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2061 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2062 }
2063 case NEON::BI__builtin_neon_vld1_dup_v:
2064 case NEON::BI__builtin_neon_vld1q_dup_v: {
2065 Value *V = PoisonValue::get(Ty);
2066 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2067 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
2068 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
2069 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
2070 return EmitNeonSplat(Ops[0], CI);
2071 }
2072 case NEON::BI__builtin_neon_vld2_lane_v:
2073 case NEON::BI__builtin_neon_vld2q_lane_v:
2074 case NEON::BI__builtin_neon_vld3_lane_v:
2075 case NEON::BI__builtin_neon_vld3q_lane_v:
2076 case NEON::BI__builtin_neon_vld4_lane_v:
2077 case NEON::BI__builtin_neon_vld4q_lane_v: {
2078 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2079 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2080 for (unsigned I = 2; I < Ops.size() - 1; ++I)
2081 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
2082 Ops.push_back(getAlignmentValue32(PtrOp1));
2083 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
2084 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2085 }
2086 case NEON::BI__builtin_neon_vmovl_v: {
2087 llvm::FixedVectorType *DTy =
2088 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2089 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2090 if (Usgn)
2091 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2092 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2093 }
2094 case NEON::BI__builtin_neon_vmovn_v: {
2095 llvm::FixedVectorType *QTy =
2096 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2097 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2098 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2099 }
2100 case NEON::BI__builtin_neon_vmull_v:
2101 // FIXME: the integer vmull operations could be emitted in terms of pure
2102 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2103 // hoisting the exts outside loops. Until global ISel comes along that can
2104 // see through such movement this leads to bad CodeGen. So we need an
2105 // intrinsic for now.
2106 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2107 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2108 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2109 case NEON::BI__builtin_neon_vpadal_v:
2110 case NEON::BI__builtin_neon_vpadalq_v: {
2111 // The source operand type has twice as many elements of half the size.
2112 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2113 llvm::Type *EltTy =
2114 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2115 auto *NarrowTy =
2116 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2117 llvm::Type *Tys[2] = { Ty, NarrowTy };
2118 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2119 }
2120 case NEON::BI__builtin_neon_vpaddl_v:
2121 case NEON::BI__builtin_neon_vpaddlq_v: {
2122 // The source operand type has twice as many elements of half the size.
2123 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2124 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2125 auto *NarrowTy =
2126 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2127 llvm::Type *Tys[2] = { Ty, NarrowTy };
2128 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2129 }
2130 case NEON::BI__builtin_neon_vqdmlal_v:
2131 case NEON::BI__builtin_neon_vqdmlsl_v: {
2132 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2133 Ops[1] =
2134 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
2135 Ops.resize(2);
2136 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2137 }
2138 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2139 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2140 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2141 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2142 auto *RTy = cast<llvm::FixedVectorType>(Ty);
2143 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2144 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2145 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2146 RTy->getNumElements() * 2);
2147 llvm::Type *Tys[2] = {
2148 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2149 /*isQuad*/ false))};
2150 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2151 }
2152 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2153 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2154 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2155 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2156 llvm::Type *Tys[2] = {
2157 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2158 /*isQuad*/ true))};
2159 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2160 }
2161 case NEON::BI__builtin_neon_vqshl_n_v:
2162 case NEON::BI__builtin_neon_vqshlq_n_v:
2163 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2164 1, false);
2165 case NEON::BI__builtin_neon_vqshlu_n_v:
2166 case NEON::BI__builtin_neon_vqshluq_n_v:
2167 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
2168 1, false);
2169 case NEON::BI__builtin_neon_vrecpe_v:
2170 case NEON::BI__builtin_neon_vrecpeq_v:
2171 case NEON::BI__builtin_neon_vrsqrte_v:
2172 case NEON::BI__builtin_neon_vrsqrteq_v:
2173 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2174 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2175 case NEON::BI__builtin_neon_vrndi_v:
2176 case NEON::BI__builtin_neon_vrndiq_v:
2177 Int = Builder.getIsFPConstrained()
2178 ? Intrinsic::experimental_constrained_nearbyint
2179 : Intrinsic::nearbyint;
2180 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2181 case NEON::BI__builtin_neon_vrshr_n_v:
2182 case NEON::BI__builtin_neon_vrshrq_n_v:
2183 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
2184 1, true);
2185 case NEON::BI__builtin_neon_vsha512hq_u64:
2186 case NEON::BI__builtin_neon_vsha512h2q_u64:
2187 case NEON::BI__builtin_neon_vsha512su0q_u64:
2188 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2189 Function *F = CGM.getIntrinsic(Int);
2190 return EmitNeonCall(F, Ops, "");
2191 }
2192 case NEON::BI__builtin_neon_vshl_n_v:
2193 case NEON::BI__builtin_neon_vshlq_n_v:
2194 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2195 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2196 "vshl_n");
2197 case NEON::BI__builtin_neon_vshll_n_v: {
2198 llvm::FixedVectorType *SrcTy =
2199 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2200 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2201 if (Usgn)
2202 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
2203 else
2204 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
2205 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
2206 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
2207 }
2208 case NEON::BI__builtin_neon_vshrn_n_v: {
2209 llvm::FixedVectorType *SrcTy =
2210 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2211 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2212 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
2213 if (Usgn)
2214 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
2215 else
2216 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
2217 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
2218 }
2219 case NEON::BI__builtin_neon_vshr_n_v:
2220 case NEON::BI__builtin_neon_vshrq_n_v:
2221 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
2222 case NEON::BI__builtin_neon_vst1_v:
2223 case NEON::BI__builtin_neon_vst1q_v:
2224 case NEON::BI__builtin_neon_vst2_v:
2225 case NEON::BI__builtin_neon_vst2q_v:
2226 case NEON::BI__builtin_neon_vst3_v:
2227 case NEON::BI__builtin_neon_vst3q_v:
2228 case NEON::BI__builtin_neon_vst4_v:
2229 case NEON::BI__builtin_neon_vst4q_v:
2230 case NEON::BI__builtin_neon_vst2_lane_v:
2231 case NEON::BI__builtin_neon_vst2q_lane_v:
2232 case NEON::BI__builtin_neon_vst3_lane_v:
2233 case NEON::BI__builtin_neon_vst3q_lane_v:
2234 case NEON::BI__builtin_neon_vst4_lane_v:
2235 case NEON::BI__builtin_neon_vst4q_lane_v: {
2236 llvm::Type *Tys[] = {Int8PtrTy, Ty};
2237 Ops.push_back(getAlignmentValue32(PtrOp0));
2238 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
2239 }
2240 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2241 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2242 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2243 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2244 case NEON::BI__builtin_neon_vsm4eq_u32: {
2245 Function *F = CGM.getIntrinsic(Int);
2246 return EmitNeonCall(F, Ops, "");
2247 }
2248 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2249 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2250 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2251 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2252 Function *F = CGM.getIntrinsic(Int);
2253 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
2254 return EmitNeonCall(F, Ops, "");
2255 }
2256 case NEON::BI__builtin_neon_vst1_x2_v:
2257 case NEON::BI__builtin_neon_vst1q_x2_v:
2258 case NEON::BI__builtin_neon_vst1_x3_v:
2259 case NEON::BI__builtin_neon_vst1q_x3_v:
2260 case NEON::BI__builtin_neon_vst1_x4_v:
2261 case NEON::BI__builtin_neon_vst1q_x4_v: {
2262 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
2263 // in AArch64 it comes last. We may want to stick to one or another.
2264 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
2265 Arch == llvm::Triple::aarch64_32) {
2266 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
2267 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2268 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2269 }
2270 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
2271 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2272 }
2273 case NEON::BI__builtin_neon_vsubhn_v: {
2274 llvm::FixedVectorType *SrcTy =
2275 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2276
2277 // %sum = add <4 x i32> %lhs, %rhs
2278 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2279 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
2280 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
2281
2282 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2283 Constant *ShiftAmt =
2284 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2285 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
2286
2287 // %res = trunc <4 x i32> %high to <4 x i16>
2288 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
2289 }
2290 case NEON::BI__builtin_neon_vtrn_v:
2291 case NEON::BI__builtin_neon_vtrnq_v: {
2292 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2293 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2294 Value *SV = nullptr;
2295
2296 for (unsigned vi = 0; vi != 2; ++vi) {
2297 SmallVector<int, 16> Indices;
2298 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2299 Indices.push_back(i+vi);
2300 Indices.push_back(i+e+vi);
2301 }
2302 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2303 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
2304 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2305 }
2306 return SV;
2307 }
2308 case NEON::BI__builtin_neon_vtst_v:
2309 case NEON::BI__builtin_neon_vtstq_v: {
2310 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2311 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2312 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
2313 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2314 ConstantAggregateZero::get(Ty));
2315 return Builder.CreateSExt(Ops[0], Ty, "vtst");
2316 }
2317 case NEON::BI__builtin_neon_vuzp_v:
2318 case NEON::BI__builtin_neon_vuzpq_v: {
2319 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2320 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2321 Value *SV = nullptr;
2322
2323 for (unsigned vi = 0; vi != 2; ++vi) {
2324 SmallVector<int, 16> Indices;
2325 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2326 Indices.push_back(2*i+vi);
2327
2328 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2329 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
2330 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2331 }
2332 return SV;
2333 }
2334 case NEON::BI__builtin_neon_vxarq_u64: {
2335 Function *F = CGM.getIntrinsic(Int);
2336 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
2337 return EmitNeonCall(F, Ops, "");
2338 }
2339 case NEON::BI__builtin_neon_vzip_v:
2340 case NEON::BI__builtin_neon_vzipq_v: {
2341 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2342 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2343 Value *SV = nullptr;
2344
2345 for (unsigned vi = 0; vi != 2; ++vi) {
2346 SmallVector<int, 16> Indices;
2347 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2348 Indices.push_back((i + vi*e) >> 1);
2349 Indices.push_back(((i + vi*e) >> 1)+e);
2350 }
2351 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2352 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
2353 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
2354 }
2355 return SV;
2356 }
2357 case NEON::BI__builtin_neon_vdot_s32:
2358 case NEON::BI__builtin_neon_vdot_u32:
2359 case NEON::BI__builtin_neon_vdotq_s32:
2360 case NEON::BI__builtin_neon_vdotq_u32: {
2361 auto *InputTy =
2362 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2363 llvm::Type *Tys[2] = { Ty, InputTy };
2364 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
2365 }
2366 case NEON::BI__builtin_neon_vfmlal_low_f16:
2367 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2368 auto *InputTy =
2369 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2370 llvm::Type *Tys[2] = { Ty, InputTy };
2371 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
2372 }
2373 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2374 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2375 auto *InputTy =
2376 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2377 llvm::Type *Tys[2] = { Ty, InputTy };
2378 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
2379 }
2380 case NEON::BI__builtin_neon_vfmlal_high_f16:
2381 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2382 auto *InputTy =
2383 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2384 llvm::Type *Tys[2] = { Ty, InputTy };
2385 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
2386 }
2387 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2388 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2389 auto *InputTy =
2390 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2391 llvm::Type *Tys[2] = { Ty, InputTy };
2392 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
2393 }
2394 case NEON::BI__builtin_neon_vmmlaq_s32:
2395 case NEON::BI__builtin_neon_vmmlaq_u32: {
2396 auto *InputTy =
2397 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2398 llvm::Type *Tys[2] = { Ty, InputTy };
2399 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
2400 }
2401 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2402 auto *InputTy =
2403 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2404 llvm::Type *Tys[2] = { Ty, InputTy };
2405 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
2406 }
2407 case NEON::BI__builtin_neon_vusdot_s32:
2408 case NEON::BI__builtin_neon_vusdotq_s32: {
2409 auto *InputTy =
2410 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2411 llvm::Type *Tys[2] = { Ty, InputTy };
2412 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
2413 }
2414 case NEON::BI__builtin_neon_vbfdot_f32:
2415 case NEON::BI__builtin_neon_vbfdotq_f32: {
2416 llvm::Type *InputTy =
2417 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2418 llvm::Type *Tys[2] = { Ty, InputTy };
2419 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
2420 }
2421 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2422 llvm::Type *Tys[1] = { Ty };
2423 Function *F = CGM.getIntrinsic(Int, Tys);
2424 return EmitNeonCall(F, Ops, "vcvtfp2bf");
2425 }
2426
2427 }
2428
2429 assert(Int && "Expected valid intrinsic number");
2430
2431 // Determine the type(s) of this overloaded AArch64 intrinsic.
2432 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
2433
2434 Value *Result = EmitNeonCall(F, Ops, NameHint);
2435 llvm::Type *ResultType = ConvertType(E->getType());
2436 // AArch64 intrinsic one-element vector type cast to
2437 // scalar type expected by the builtin
2438 return Builder.CreateBitCast(Result, ResultType, NameHint);
2439}
2440
2441Value *
2443 const CmpInst::Predicate Pred,
2444 const Twine &Name) {
2445
2446 if (isa<FixedVectorType>(Ty)) {
2447 // Vector types are cast to i8 vectors. Recover original type.
2448 Op = Builder.CreateBitCast(Op, Ty);
2449 }
2450
2451 if (CmpInst::isFPPredicate(Pred)) {
2452 if (Pred == CmpInst::FCMP_OEQ)
2453 Op = Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->getType()));
2454 else
2455 Op = Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->getType()));
2456 } else {
2457 Op = Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->getType()));
2458 }
2459
2460 llvm::Type *ResTy = Ty;
2461 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
2462 ResTy = FixedVectorType::get(
2463 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
2464 VTy->getNumElements());
2465
2466 return Builder.CreateSExt(Op, ResTy, Name);
2467}
2468
2470 Value *ExtOp, Value *IndexOp,
2471 llvm::Type *ResTy, unsigned IntID,
2472 const char *Name) {
2474 if (ExtOp)
2475 TblOps.push_back(ExtOp);
2476
2477 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
2478 SmallVector<int, 16> Indices;
2479 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
2480 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2481 Indices.push_back(2*i);
2482 Indices.push_back(2*i+1);
2483 }
2484
2485 int PairPos = 0, End = Ops.size() - 1;
2486 while (PairPos < End) {
2487 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2488 Ops[PairPos+1], Indices,
2489 Name));
2490 PairPos += 2;
2491 }
2492
2493 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
2494 // of the 128-bit lookup table with zero.
2495 if (PairPos == End) {
2496 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
2497 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2498 ZeroTbl, Indices, Name));
2499 }
2500
2501 Function *TblF;
2502 TblOps.push_back(IndexOp);
2503 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
2504
2505 return CGF.EmitNeonCall(TblF, TblOps, Name);
2506}
2507
2508Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
2509 unsigned Value;
2510 switch (BuiltinID) {
2511 default:
2512 return nullptr;
2513 case clang::ARM::BI__builtin_arm_nop:
2514 Value = 0;
2515 break;
2516 case clang::ARM::BI__builtin_arm_yield:
2517 case clang::ARM::BI__yield:
2518 Value = 1;
2519 break;
2520 case clang::ARM::BI__builtin_arm_wfe:
2521 case clang::ARM::BI__wfe:
2522 Value = 2;
2523 break;
2524 case clang::ARM::BI__builtin_arm_wfi:
2525 case clang::ARM::BI__wfi:
2526 Value = 3;
2527 break;
2528 case clang::ARM::BI__builtin_arm_sev:
2529 case clang::ARM::BI__sev:
2530 Value = 4;
2531 break;
2532 case clang::ARM::BI__builtin_arm_sevl:
2533 case clang::ARM::BI__sevl:
2534 Value = 5;
2535 break;
2536 }
2537
2538 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
2539 llvm::ConstantInt::get(Int32Ty, Value));
2540}
2541
2547
2548// Generates the IR for the read/write special register builtin,
2549// ValueType is the type of the value that is to be written or read,
2550// RegisterType is the type of the register being written to or read from.
2552 const CallExpr *E,
2553 llvm::Type *RegisterType,
2554 llvm::Type *ValueType,
2555 SpecialRegisterAccessKind AccessKind,
2556 StringRef SysReg = "") {
2557 // write and register intrinsics only support 32, 64 and 128 bit operations.
2558 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2559 RegisterType->isIntegerTy(128)) &&
2560 "Unsupported size for register.");
2561
2562 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2563 CodeGen::CodeGenModule &CGM = CGF.CGM;
2564 LLVMContext &Context = CGM.getLLVMContext();
2565
2566 if (SysReg.empty()) {
2567 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
2568 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2569 }
2570
2571 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2572 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2573 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2574
2575 llvm::Type *Types[] = { RegisterType };
2576
2577 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2578 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2579 && "Can't fit 64-bit value in 32-bit register");
2580
2581 if (AccessKind != Write) {
2582 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2583 llvm::Function *F = CGM.getIntrinsic(
2584 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2585 : Intrinsic::read_register,
2586 Types);
2587 llvm::Value *Call = Builder.CreateCall(F, Metadata);
2588
2589 if (MixedTypes)
2590 // Read into 64 bit register and then truncate result to 32 bit.
2591 return Builder.CreateTrunc(Call, ValueType);
2592
2593 if (ValueType->isPointerTy())
2594 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2595 return Builder.CreateIntToPtr(Call, ValueType);
2596
2597 return Call;
2598 }
2599
2600 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2601 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2602 if (MixedTypes) {
2603 // Extend 32 bit write value to 64 bit to pass to write.
2604 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2605 return Builder.CreateCall(F, { Metadata, ArgValue });
2606 }
2607
2608 if (ValueType->isPointerTy()) {
2609 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2610 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2611 return Builder.CreateCall(F, { Metadata, ArgValue });
2612 }
2613
2614 return Builder.CreateCall(F, { Metadata, ArgValue });
2615}
2616
2617/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2618/// argument that specifies the vector type.
2619static bool HasExtraNeonArgument(unsigned BuiltinID) {
2620 switch (BuiltinID) {
2621 default: break;
2622 case NEON::BI__builtin_neon_vget_lane_i8:
2623 case NEON::BI__builtin_neon_vget_lane_i16:
2624 case NEON::BI__builtin_neon_vget_lane_bf16:
2625 case NEON::BI__builtin_neon_vget_lane_i32:
2626 case NEON::BI__builtin_neon_vget_lane_i64:
2627 case NEON::BI__builtin_neon_vget_lane_mf8:
2628 case NEON::BI__builtin_neon_vget_lane_f32:
2629 case NEON::BI__builtin_neon_vgetq_lane_i8:
2630 case NEON::BI__builtin_neon_vgetq_lane_i16:
2631 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2632 case NEON::BI__builtin_neon_vgetq_lane_i32:
2633 case NEON::BI__builtin_neon_vgetq_lane_i64:
2634 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2635 case NEON::BI__builtin_neon_vgetq_lane_f32:
2636 case NEON::BI__builtin_neon_vduph_lane_bf16:
2637 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2638 case NEON::BI__builtin_neon_vset_lane_i8:
2639 case NEON::BI__builtin_neon_vset_lane_mf8:
2640 case NEON::BI__builtin_neon_vset_lane_i16:
2641 case NEON::BI__builtin_neon_vset_lane_bf16:
2642 case NEON::BI__builtin_neon_vset_lane_i32:
2643 case NEON::BI__builtin_neon_vset_lane_i64:
2644 case NEON::BI__builtin_neon_vset_lane_f32:
2645 case NEON::BI__builtin_neon_vsetq_lane_i8:
2646 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2647 case NEON::BI__builtin_neon_vsetq_lane_i16:
2648 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2649 case NEON::BI__builtin_neon_vsetq_lane_i32:
2650 case NEON::BI__builtin_neon_vsetq_lane_i64:
2651 case NEON::BI__builtin_neon_vsetq_lane_f32:
2652 case NEON::BI__builtin_neon_vsha1h_u32:
2653 case NEON::BI__builtin_neon_vsha1cq_u32:
2654 case NEON::BI__builtin_neon_vsha1pq_u32:
2655 case NEON::BI__builtin_neon_vsha1mq_u32:
2656 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2657 case clang::ARM::BI_MoveToCoprocessor:
2658 case clang::ARM::BI_MoveToCoprocessor2:
2659 return false;
2660 }
2661 return true;
2662}
2663
2665 const CallExpr *E,
2667 llvm::Triple::ArchType Arch) {
2668 if (auto Hint = GetValueForARMHint(BuiltinID))
2669 return Hint;
2670
2671 if (BuiltinID == clang::ARM::BI__emit) {
2672 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2673 llvm::FunctionType *FTy =
2674 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2675
2677 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2678 llvm_unreachable("Sema will ensure that the parameter is constant");
2679
2680 llvm::APSInt Value = Result.Val.getInt();
2681 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2682
2683 llvm::InlineAsm *Emit =
2684 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2685 /*hasSideEffects=*/true)
2686 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2687 /*hasSideEffects=*/true);
2688
2689 return Builder.CreateCall(Emit);
2690 }
2691
2692 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2693 Value *Option = EmitScalarExpr(E->getArg(0));
2694 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2695 }
2696
2697 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2699 Value *RW = EmitScalarExpr(E->getArg(1));
2700 Value *IsData = EmitScalarExpr(E->getArg(2));
2701
2702 // Locality is not supported on ARM target
2703 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2704
2705 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2706 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2707 }
2708
2709 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2710 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2711 return Builder.CreateCall(
2712 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2713 }
2714
2715 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2716 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2717 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2718 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2719 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2720 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2721 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2722 return Res;
2723 }
2724
2725
2726 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2727 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2728 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2729 }
2730 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2731 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2732 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2733 "cls");
2734 }
2735
2736 if (BuiltinID == clang::ARM::BI__clear_cache) {
2737 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2738 const FunctionDecl *FD = E->getDirectCallee();
2739 Value *Ops[2];
2740 for (unsigned i = 0; i < 2; i++)
2741 Ops[i] = EmitScalarExpr(E->getArg(i));
2742 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2743 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2744 StringRef Name = FD->getName();
2745 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2746 }
2747
2748 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2749 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2750 Function *F;
2751
2752 switch (BuiltinID) {
2753 default: llvm_unreachable("unexpected builtin");
2754 case clang::ARM::BI__builtin_arm_mcrr:
2755 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2756 break;
2757 case clang::ARM::BI__builtin_arm_mcrr2:
2758 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2759 break;
2760 }
2761
2762 // MCRR{2} instruction has 5 operands but
2763 // the intrinsic has 4 because Rt and Rt2
2764 // are represented as a single unsigned 64
2765 // bit integer in the intrinsic definition
2766 // but internally it's represented as 2 32
2767 // bit integers.
2768
2769 Value *Coproc = EmitScalarExpr(E->getArg(0));
2770 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2771 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2772 Value *CRm = EmitScalarExpr(E->getArg(3));
2773
2774 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2775 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2776 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2777 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2778
2779 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2780 }
2781
2782 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2783 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2784 Function *F;
2785
2786 switch (BuiltinID) {
2787 default: llvm_unreachable("unexpected builtin");
2788 case clang::ARM::BI__builtin_arm_mrrc:
2789 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2790 break;
2791 case clang::ARM::BI__builtin_arm_mrrc2:
2792 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2793 break;
2794 }
2795
2796 Value *Coproc = EmitScalarExpr(E->getArg(0));
2797 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2798 Value *CRm = EmitScalarExpr(E->getArg(2));
2799 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2800
2801 // Returns an unsigned 64 bit integer, represented
2802 // as two 32 bit integers.
2803
2804 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2805 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2806 Rt = Builder.CreateZExt(Rt, Int64Ty);
2807 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2808
2809 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2810 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2811 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2812
2813 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2814 }
2815
2816 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2817 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2818 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2819 getContext().getTypeSize(E->getType()) == 64) ||
2820 BuiltinID == clang::ARM::BI__ldrexd) {
2821 Function *F;
2822
2823 switch (BuiltinID) {
2824 default: llvm_unreachable("unexpected builtin");
2825 case clang::ARM::BI__builtin_arm_ldaex:
2826 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2827 break;
2828 case clang::ARM::BI__builtin_arm_ldrexd:
2829 case clang::ARM::BI__builtin_arm_ldrex:
2830 case clang::ARM::BI__ldrexd:
2831 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2832 break;
2833 }
2834
2835 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2836 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2837
2838 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2839 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2840 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2841 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2842
2843 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2844 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2845 Val = Builder.CreateOr(Val, Val1);
2846 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2847 }
2848
2849 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2850 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2851 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2852
2853 QualType Ty = E->getType();
2854 llvm::Type *RealResTy = ConvertType(Ty);
2855 llvm::Type *IntTy =
2856 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2857
2858 Function *F = CGM.getIntrinsic(
2859 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2860 : Intrinsic::arm_ldrex,
2861 UnqualPtrTy);
2862 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2863 Val->addParamAttr(
2864 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2865
2866 if (RealResTy->isPointerTy())
2867 return Builder.CreateIntToPtr(Val, RealResTy);
2868 else {
2869 llvm::Type *IntResTy = llvm::IntegerType::get(
2870 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2871 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2872 RealResTy);
2873 }
2874 }
2875
2876 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2877 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2878 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2879 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2880 Function *F = CGM.getIntrinsic(
2881 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2882 : Intrinsic::arm_strexd);
2883 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2884
2885 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
2886 Value *Val = EmitScalarExpr(E->getArg(0));
2887 Builder.CreateStore(Val, Tmp);
2888
2889 Address LdPtr = Tmp.withElementType(STy);
2890 Val = Builder.CreateLoad(LdPtr);
2891
2892 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2893 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2894 Value *StPtr = EmitScalarExpr(E->getArg(1));
2895 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2896 }
2897
2898 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2899 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2900 Value *StoreVal = EmitScalarExpr(E->getArg(0));
2901 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2902
2903 QualType Ty = E->getArg(0)->getType();
2904 llvm::Type *StoreTy =
2905 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2906
2907 if (StoreVal->getType()->isPointerTy())
2908 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2909 else {
2910 llvm::Type *IntTy = llvm::IntegerType::get(
2912 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
2913 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
2914 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2915 }
2916
2917 Function *F = CGM.getIntrinsic(
2918 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2919 : Intrinsic::arm_strex,
2920 StoreAddr->getType());
2921
2922 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
2923 CI->addParamAttr(
2924 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
2925 return CI;
2926 }
2927
2928 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2929 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2930 return Builder.CreateCall(F);
2931 }
2932
2933 // CRC32
2934 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2935 switch (BuiltinID) {
2936 case clang::ARM::BI__builtin_arm_crc32b:
2937 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2938 case clang::ARM::BI__builtin_arm_crc32cb:
2939 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2940 case clang::ARM::BI__builtin_arm_crc32h:
2941 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2942 case clang::ARM::BI__builtin_arm_crc32ch:
2943 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2944 case clang::ARM::BI__builtin_arm_crc32w:
2945 case clang::ARM::BI__builtin_arm_crc32d:
2946 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2947 case clang::ARM::BI__builtin_arm_crc32cw:
2948 case clang::ARM::BI__builtin_arm_crc32cd:
2949 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2950 }
2951
2952 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2953 Value *Arg0 = EmitScalarExpr(E->getArg(0));
2954 Value *Arg1 = EmitScalarExpr(E->getArg(1));
2955
2956 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
2957 // intrinsics, hence we need different codegen for these cases.
2958 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2959 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2960 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2961 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
2962 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
2963 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
2964
2965 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2966 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
2967 return Builder.CreateCall(F, {Res, Arg1b});
2968 } else {
2969 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
2970
2971 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2972 return Builder.CreateCall(F, {Arg0, Arg1});
2973 }
2974 }
2975
2976 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2977 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2978 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2979 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2980 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2981 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2982
2983 SpecialRegisterAccessKind AccessKind = Write;
2984 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2985 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2986 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2987 AccessKind = VolatileRead;
2988
2989 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2990 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2991
2992 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2993 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2994
2995 llvm::Type *ValueType;
2996 llvm::Type *RegisterType;
2997 if (IsPointerBuiltin) {
2998 ValueType = VoidPtrTy;
3000 } else if (Is64Bit) {
3001 ValueType = RegisterType = Int64Ty;
3002 } else {
3003 ValueType = RegisterType = Int32Ty;
3004 }
3005
3006 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
3007 AccessKind);
3008 }
3009
3010 if (BuiltinID == ARM::BI__builtin_sponentry) {
3011 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
3012 return Builder.CreateCall(F);
3013 }
3014
3015 // Handle MSVC intrinsics before argument evaluation to prevent double
3016 // evaluation.
3017 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
3018 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
3019
3020 // Deal with MVE builtins
3021 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3022 return Result;
3023 // Handle CDE builtins
3024 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3025 return Result;
3026
3027 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
3028 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
3029 return P.first == BuiltinID;
3030 });
3031 if (It != end(NEONEquivalentIntrinsicMap))
3032 BuiltinID = It->second;
3033
3034 // Find out if any arguments are required to be integer constant
3035 // expressions.
3036 unsigned ICEArguments = 0;
3038 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3039 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3040
3041 auto getAlignmentValue32 = [&](Address addr) -> Value* {
3042 return Builder.getInt32(addr.getAlignment().getQuantity());
3043 };
3044
3045 Address PtrOp0 = Address::invalid();
3046 Address PtrOp1 = Address::invalid();
3048 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3049 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3050 for (unsigned i = 0, e = NumArgs; i != e; i++) {
3051 if (i == 0) {
3052 switch (BuiltinID) {
3053 case NEON::BI__builtin_neon_vld1_v:
3054 case NEON::BI__builtin_neon_vld1q_v:
3055 case NEON::BI__builtin_neon_vld1q_lane_v:
3056 case NEON::BI__builtin_neon_vld1_lane_v:
3057 case NEON::BI__builtin_neon_vld1_dup_v:
3058 case NEON::BI__builtin_neon_vld1q_dup_v:
3059 case NEON::BI__builtin_neon_vst1_v:
3060 case NEON::BI__builtin_neon_vst1q_v:
3061 case NEON::BI__builtin_neon_vst1q_lane_v:
3062 case NEON::BI__builtin_neon_vst1_lane_v:
3063 case NEON::BI__builtin_neon_vst2_v:
3064 case NEON::BI__builtin_neon_vst2q_v:
3065 case NEON::BI__builtin_neon_vst2_lane_v:
3066 case NEON::BI__builtin_neon_vst2q_lane_v:
3067 case NEON::BI__builtin_neon_vst3_v:
3068 case NEON::BI__builtin_neon_vst3q_v:
3069 case NEON::BI__builtin_neon_vst3_lane_v:
3070 case NEON::BI__builtin_neon_vst3q_lane_v:
3071 case NEON::BI__builtin_neon_vst4_v:
3072 case NEON::BI__builtin_neon_vst4q_v:
3073 case NEON::BI__builtin_neon_vst4_lane_v:
3074 case NEON::BI__builtin_neon_vst4q_lane_v:
3075 // Get the alignment for the argument in addition to the value;
3076 // we'll use it later.
3077 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
3078 Ops.push_back(PtrOp0.emitRawPointer(*this));
3079 continue;
3080 }
3081 }
3082 if (i == 1) {
3083 switch (BuiltinID) {
3084 case NEON::BI__builtin_neon_vld2_v:
3085 case NEON::BI__builtin_neon_vld2q_v:
3086 case NEON::BI__builtin_neon_vld3_v:
3087 case NEON::BI__builtin_neon_vld3q_v:
3088 case NEON::BI__builtin_neon_vld4_v:
3089 case NEON::BI__builtin_neon_vld4q_v:
3090 case NEON::BI__builtin_neon_vld2_lane_v:
3091 case NEON::BI__builtin_neon_vld2q_lane_v:
3092 case NEON::BI__builtin_neon_vld3_lane_v:
3093 case NEON::BI__builtin_neon_vld3q_lane_v:
3094 case NEON::BI__builtin_neon_vld4_lane_v:
3095 case NEON::BI__builtin_neon_vld4q_lane_v:
3096 case NEON::BI__builtin_neon_vld2_dup_v:
3097 case NEON::BI__builtin_neon_vld2q_dup_v:
3098 case NEON::BI__builtin_neon_vld3_dup_v:
3099 case NEON::BI__builtin_neon_vld3q_dup_v:
3100 case NEON::BI__builtin_neon_vld4_dup_v:
3101 case NEON::BI__builtin_neon_vld4q_dup_v:
3102 // Get the alignment for the argument in addition to the value;
3103 // we'll use it later.
3104 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
3105 Ops.push_back(PtrOp1.emitRawPointer(*this));
3106 continue;
3107 }
3108 }
3109
3110 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
3111 }
3112
3113 switch (BuiltinID) {
3114 default: break;
3115
3116 case NEON::BI__builtin_neon_vget_lane_i8:
3117 case NEON::BI__builtin_neon_vget_lane_i16:
3118 case NEON::BI__builtin_neon_vget_lane_i32:
3119 case NEON::BI__builtin_neon_vget_lane_i64:
3120 case NEON::BI__builtin_neon_vget_lane_bf16:
3121 case NEON::BI__builtin_neon_vget_lane_f32:
3122 case NEON::BI__builtin_neon_vgetq_lane_i8:
3123 case NEON::BI__builtin_neon_vgetq_lane_i16:
3124 case NEON::BI__builtin_neon_vgetq_lane_i32:
3125 case NEON::BI__builtin_neon_vgetq_lane_i64:
3126 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3127 case NEON::BI__builtin_neon_vgetq_lane_f32:
3128 case NEON::BI__builtin_neon_vduph_lane_bf16:
3129 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3130 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
3131
3132 case NEON::BI__builtin_neon_vrndns_f32: {
3133 Value *Arg = EmitScalarExpr(E->getArg(0));
3134 llvm::Type *Tys[] = {Arg->getType()};
3135 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
3136 return Builder.CreateCall(F, {Arg}, "vrndn"); }
3137
3138 case NEON::BI__builtin_neon_vset_lane_i8:
3139 case NEON::BI__builtin_neon_vset_lane_i16:
3140 case NEON::BI__builtin_neon_vset_lane_i32:
3141 case NEON::BI__builtin_neon_vset_lane_i64:
3142 case NEON::BI__builtin_neon_vset_lane_bf16:
3143 case NEON::BI__builtin_neon_vset_lane_f32:
3144 case NEON::BI__builtin_neon_vsetq_lane_i8:
3145 case NEON::BI__builtin_neon_vsetq_lane_i16:
3146 case NEON::BI__builtin_neon_vsetq_lane_i32:
3147 case NEON::BI__builtin_neon_vsetq_lane_i64:
3148 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3149 case NEON::BI__builtin_neon_vsetq_lane_f32:
3150 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3151
3152 case NEON::BI__builtin_neon_vsha1h_u32:
3153 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3154 "vsha1h");
3155 case NEON::BI__builtin_neon_vsha1cq_u32:
3156 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3157 "vsha1h");
3158 case NEON::BI__builtin_neon_vsha1pq_u32:
3159 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3160 "vsha1h");
3161 case NEON::BI__builtin_neon_vsha1mq_u32:
3162 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3163 "vsha1h");
3164
3165 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3166 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3167 "vcvtbfp2bf");
3168 }
3169
3170 // The ARM _MoveToCoprocessor builtins put the input register value as
3171 // the first argument, but the LLVM intrinsic expects it as the third one.
3172 case clang::ARM::BI_MoveToCoprocessor:
3173 case clang::ARM::BI_MoveToCoprocessor2: {
3174 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
3175 ? Intrinsic::arm_mcr
3176 : Intrinsic::arm_mcr2);
3177 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3178 Ops[3], Ops[4], Ops[5]});
3179 }
3180 }
3181
3182 // Get the last argument, which specifies the vector type.
3183 assert(HasExtraArg);
3184 const Expr *Arg = E->getArg(E->getNumArgs()-1);
3185 std::optional<llvm::APSInt> Result =
3187 if (!Result)
3188 return nullptr;
3189
3190 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3191 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3192 // Determine the overloaded type of this builtin.
3193 llvm::Type *Ty;
3194 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3195 Ty = FloatTy;
3196 else
3197 Ty = DoubleTy;
3198
3199 // Determine whether this is an unsigned conversion or not.
3200 bool usgn = Result->getZExtValue() == 1;
3201 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3202
3203 // Call the appropriate intrinsic.
3204 Function *F = CGM.getIntrinsic(Int, Ty);
3205 return Builder.CreateCall(F, Ops, "vcvtr");
3206 }
3207
3208 // Determine the type of this overloaded NEON intrinsic.
3209 NeonTypeFlags Type = Result->getZExtValue();
3210 bool usgn = Type.isUnsigned();
3211 bool rightShift = false;
3212
3213 llvm::FixedVectorType *VTy =
3214 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
3215 getTarget().hasBFloat16Type());
3216 llvm::Type *Ty = VTy;
3217 if (!Ty)
3218 return nullptr;
3219
3220 // Many NEON builtins have identical semantics and uses in ARM and
3221 // AArch64. Emit these in a single function.
3222 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
3223 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
3224 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3225 if (Builtin)
3227 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
3228 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
3229
3230 unsigned Int;
3231 switch (BuiltinID) {
3232 default: return nullptr;
3233 case NEON::BI__builtin_neon_vld1q_lane_v:
3234 // Handle 64-bit integer elements as a special case. Use shuffles of
3235 // one-element vectors to avoid poor code for i64 in the backend.
3236 if (VTy->getElementType()->isIntegerTy(64)) {
3237 // Extract the other lane.
3238 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3239 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3240 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3241 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3242 // Load the value as a one-element vector.
3243 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
3244 llvm::Type *Tys[] = {Ty, Int8PtrTy};
3245 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
3246 Value *Align = getAlignmentValue32(PtrOp0);
3247 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
3248 // Combine them.
3249 int Indices[] = {1 - Lane, Lane};
3250 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
3251 }
3252 [[fallthrough]];
3253 case NEON::BI__builtin_neon_vld1_lane_v: {
3254 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3255 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
3256 Value *Ld = Builder.CreateLoad(PtrOp0);
3257 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3258 }
3259 case NEON::BI__builtin_neon_vqrshrn_n_v:
3260 Int =
3261 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3262 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
3263 1, true);
3264 case NEON::BI__builtin_neon_vqrshrun_n_v:
3265 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3266 Ops, "vqrshrun_n", 1, true);
3267 case NEON::BI__builtin_neon_vqshrn_n_v:
3268 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3269 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
3270 1, true);
3271 case NEON::BI__builtin_neon_vqshrun_n_v:
3272 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3273 Ops, "vqshrun_n", 1, true);
3274 case NEON::BI__builtin_neon_vrecpe_v:
3275 case NEON::BI__builtin_neon_vrecpeq_v:
3276 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3277 Ops, "vrecpe");
3278 case NEON::BI__builtin_neon_vrshrn_n_v:
3279 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3280 Ops, "vrshrn_n", 1, true);
3281 case NEON::BI__builtin_neon_vrsra_n_v:
3282 case NEON::BI__builtin_neon_vrsraq_n_v:
3283 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3284 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3285 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
3286 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3287 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
3288 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
3289 case NEON::BI__builtin_neon_vsri_n_v:
3290 case NEON::BI__builtin_neon_vsriq_n_v:
3291 rightShift = true;
3292 [[fallthrough]];
3293 case NEON::BI__builtin_neon_vsli_n_v:
3294 case NEON::BI__builtin_neon_vsliq_n_v:
3295 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
3296 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3297 Ops, "vsli_n");
3298 case NEON::BI__builtin_neon_vsra_n_v:
3299 case NEON::BI__builtin_neon_vsraq_n_v:
3300 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3301 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
3302 return Builder.CreateAdd(Ops[0], Ops[1]);
3303 case NEON::BI__builtin_neon_vst1q_lane_v:
3304 // Handle 64-bit integer elements as a special case. Use a shuffle to get
3305 // a one-element vector and avoid poor code for i64 in the backend.
3306 if (VTy->getElementType()->isIntegerTy(64)) {
3307 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3308 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3309 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3310 Ops[2] = getAlignmentValue32(PtrOp0);
3311 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
3312 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3313 Tys), Ops);
3314 }
3315 [[fallthrough]];
3316 case NEON::BI__builtin_neon_vst1_lane_v: {
3317 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3318 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
3319 return Builder.CreateStore(Ops[1],
3320 PtrOp0.withElementType(Ops[1]->getType()));
3321 }
3322 case NEON::BI__builtin_neon_vtbl1_v:
3323 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3324 Ops, "vtbl1");
3325 case NEON::BI__builtin_neon_vtbl2_v:
3326 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3327 Ops, "vtbl2");
3328 case NEON::BI__builtin_neon_vtbl3_v:
3329 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3330 Ops, "vtbl3");
3331 case NEON::BI__builtin_neon_vtbl4_v:
3332 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3333 Ops, "vtbl4");
3334 case NEON::BI__builtin_neon_vtbx1_v:
3335 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3336 Ops, "vtbx1");
3337 case NEON::BI__builtin_neon_vtbx2_v:
3338 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3339 Ops, "vtbx2");
3340 case NEON::BI__builtin_neon_vtbx3_v:
3341 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3342 Ops, "vtbx3");
3343 case NEON::BI__builtin_neon_vtbx4_v:
3344 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3345 Ops, "vtbx4");
3346 }
3347}
3348
3349template<typename Integer>
3351 return E->getIntegerConstantExpr(Context)->getExtValue();
3352}
3353
3354static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
3355 llvm::Type *T, bool Unsigned) {
3356 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
3357 // which finds it convenient to specify signed/unsigned as a boolean flag.
3358 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
3359}
3360
3361static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
3362 uint32_t Shift, bool Unsigned) {
3363 // MVE helper function for integer shift right. This must handle signed vs
3364 // unsigned, and also deal specially with the case where the shift count is
3365 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
3366 // undefined behavior, but in MVE it's legal, so we must convert it to code
3367 // that is not undefined in IR.
3368 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
3369 ->getElementType()
3370 ->getPrimitiveSizeInBits();
3371 if (Shift == LaneBits) {
3372 // An unsigned shift of the full lane size always generates zero, so we can
3373 // simply emit a zero vector. A signed shift of the full lane size does the
3374 // same thing as shifting by one bit fewer.
3375 if (Unsigned)
3376 return llvm::Constant::getNullValue(V->getType());
3377 else
3378 --Shift;
3379 }
3380 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
3381}
3382
3383static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
3384 // MVE-specific helper function for a vector splat, which infers the element
3385 // count of the output vector by knowing that MVE vectors are all 128 bits
3386 // wide.
3387 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
3388 return Builder.CreateVectorSplat(Elements, V);
3389}
3390
3391static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
3392 CodeGenFunction *CGF,
3393 llvm::Value *V,
3394 llvm::Type *DestType) {
3395 // Convert one MVE vector type into another by reinterpreting its in-register
3396 // format.
3397 //
3398 // Little-endian, this is identical to a bitcast (which reinterprets the
3399 // memory format). But big-endian, they're not necessarily the same, because
3400 // the register and memory formats map to each other differently depending on
3401 // the lane size.
3402 //
3403 // We generate a bitcast whenever we can (if we're little-endian, or if the
3404 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
3405 // that performs the different kind of reinterpretation.
3406 if (CGF->getTarget().isBigEndian() &&
3407 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3408 return Builder.CreateCall(
3409 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
3410 {DestType, V->getType()}),
3411 V);
3412 } else {
3413 return Builder.CreateBitCast(V, DestType);
3414 }
3415}
3416
3417static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
3418 // Make a shufflevector that extracts every other element of a vector (evens
3419 // or odds, as desired).
3420 SmallVector<int, 16> Indices;
3421 unsigned InputElements =
3422 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
3423 for (unsigned i = 0; i < InputElements; i += 2)
3424 Indices.push_back(i + Odd);
3425 return Builder.CreateShuffleVector(V, Indices);
3426}
3427
3428static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
3429 llvm::Value *V1) {
3430 // Make a shufflevector that interleaves two vectors element by element.
3431 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
3432 SmallVector<int, 16> Indices;
3433 unsigned InputElements =
3434 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
3435 for (unsigned i = 0; i < InputElements; i++) {
3436 Indices.push_back(i);
3437 Indices.push_back(i + InputElements);
3438 }
3439 return Builder.CreateShuffleVector(V0, V1, Indices);
3440}
3441
3442template<unsigned HighBit, unsigned OtherBits>
3443static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
3444 // MVE-specific helper function to make a vector splat of a constant such as
3445 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
3446 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
3447 unsigned LaneBits = T->getPrimitiveSizeInBits();
3448 uint32_t Value = HighBit << (LaneBits - 1);
3449 if (OtherBits)
3450 Value |= (1UL << (LaneBits - 1)) - 1;
3451 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
3452 return ARMMVEVectorSplat(Builder, Lane);
3453}
3454
3455static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
3456 llvm::Value *V,
3457 unsigned ReverseWidth) {
3458 // MVE-specific helper function which reverses the elements of a
3459 // vector within every (ReverseWidth)-bit collection of lanes.
3460 SmallVector<int, 16> Indices;
3461 unsigned LaneSize = V->getType()->getScalarSizeInBits();
3462 unsigned Elements = 128 / LaneSize;
3463 unsigned Mask = ReverseWidth / LaneSize - 1;
3464 for (unsigned i = 0; i < Elements; i++)
3465 Indices.push_back(i ^ Mask);
3466 return Builder.CreateShuffleVector(V, Indices);
3467}
3468
3470 const CallExpr *E,
3472 llvm::Triple::ArchType Arch) {
3473 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3474 Intrinsic::ID IRIntr;
3475 unsigned NumVectors;
3476
3477 // Code autogenerated by Tablegen will handle all the simple builtins.
3478 switch (BuiltinID) {
3479 #include "clang/Basic/arm_mve_builtin_cg.inc"
3480
3481 // If we didn't match an MVE builtin id at all, go back to the
3482 // main EmitARMBuiltinExpr.
3483 default:
3484 return nullptr;
3485 }
3486
3487 // Anything that breaks from that switch is an MVE builtin that
3488 // needs handwritten code to generate.
3489
3490 switch (CustomCodeGenType) {
3491
3492 case CustomCodeGen::VLD24: {
3495
3496 auto MvecCType = E->getType();
3497 auto MvecLType = ConvertType(MvecCType);
3498 assert(MvecLType->isStructTy() &&
3499 "Return type for vld[24]q should be a struct");
3500 assert(MvecLType->getStructNumElements() == 1 &&
3501 "Return-type struct for vld[24]q should have one element");
3502 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3503 assert(MvecLTypeInner->isArrayTy() &&
3504 "Return-type struct for vld[24]q should contain an array");
3505 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3506 "Array member of return-type struct vld[24]q has wrong length");
3507 auto VecLType = MvecLTypeInner->getArrayElementType();
3508
3509 Tys.push_back(VecLType);
3510
3511 auto Addr = E->getArg(0);
3512 Ops.push_back(EmitScalarExpr(Addr));
3513 Tys.push_back(ConvertType(Addr->getType()));
3514
3515 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3516 Value *LoadResult = Builder.CreateCall(F, Ops);
3517 Value *MvecOut = PoisonValue::get(MvecLType);
3518 for (unsigned i = 0; i < NumVectors; ++i) {
3519 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
3520 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3521 }
3522
3523 if (ReturnValue.isNull())
3524 return MvecOut;
3525 else
3526 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3527 }
3528
3529 case CustomCodeGen::VST24: {
3532
3533 auto Addr = E->getArg(0);
3534 Ops.push_back(EmitScalarExpr(Addr));
3535 Tys.push_back(ConvertType(Addr->getType()));
3536
3537 auto MvecCType = E->getArg(1)->getType();
3538 auto MvecLType = ConvertType(MvecCType);
3539 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3540 assert(MvecLType->getStructNumElements() == 1 &&
3541 "Data-type struct for vst2q should have one element");
3542 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3543 assert(MvecLTypeInner->isArrayTy() &&
3544 "Data-type struct for vst2q should contain an array");
3545 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3546 "Array member of return-type struct vld[24]q has wrong length");
3547 auto VecLType = MvecLTypeInner->getArrayElementType();
3548
3549 Tys.push_back(VecLType);
3550
3551 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3552 EmitAggExpr(E->getArg(1), MvecSlot);
3553 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3554 for (unsigned i = 0; i < NumVectors; i++)
3555 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3556
3557 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3558 Value *ToReturn = nullptr;
3559 for (unsigned i = 0; i < NumVectors; i++) {
3560 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3561 ToReturn = Builder.CreateCall(F, Ops);
3562 Ops.pop_back();
3563 }
3564 return ToReturn;
3565 }
3566 }
3567 llvm_unreachable("unknown custom codegen type.");
3568}
3569
3571 const CallExpr *E,
3573 llvm::Triple::ArchType Arch) {
3574 switch (BuiltinID) {
3575 default:
3576 return nullptr;
3577#include "clang/Basic/arm_cde_builtin_cg.inc"
3578 }
3579}
3580
3581static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3582 const CallExpr *E,
3584 llvm::Triple::ArchType Arch) {
3585 unsigned int Int = 0;
3586 const char *s = nullptr;
3587
3588 switch (BuiltinID) {
3589 default:
3590 return nullptr;
3591 case NEON::BI__builtin_neon_vtbl1_v:
3592 case NEON::BI__builtin_neon_vqtbl1_v:
3593 case NEON::BI__builtin_neon_vqtbl1q_v:
3594 case NEON::BI__builtin_neon_vtbl2_v:
3595 case NEON::BI__builtin_neon_vqtbl2_v:
3596 case NEON::BI__builtin_neon_vqtbl2q_v:
3597 case NEON::BI__builtin_neon_vtbl3_v:
3598 case NEON::BI__builtin_neon_vqtbl3_v:
3599 case NEON::BI__builtin_neon_vqtbl3q_v:
3600 case NEON::BI__builtin_neon_vtbl4_v:
3601 case NEON::BI__builtin_neon_vqtbl4_v:
3602 case NEON::BI__builtin_neon_vqtbl4q_v:
3603 break;
3604 case NEON::BI__builtin_neon_vtbx1_v:
3605 case NEON::BI__builtin_neon_vqtbx1_v:
3606 case NEON::BI__builtin_neon_vqtbx1q_v:
3607 case NEON::BI__builtin_neon_vtbx2_v:
3608 case NEON::BI__builtin_neon_vqtbx2_v:
3609 case NEON::BI__builtin_neon_vqtbx2q_v:
3610 case NEON::BI__builtin_neon_vtbx3_v:
3611 case NEON::BI__builtin_neon_vqtbx3_v:
3612 case NEON::BI__builtin_neon_vqtbx3q_v:
3613 case NEON::BI__builtin_neon_vtbx4_v:
3614 case NEON::BI__builtin_neon_vqtbx4_v:
3615 case NEON::BI__builtin_neon_vqtbx4q_v:
3616 break;
3617 }
3618
3619 assert(E->getNumArgs() >= 3);
3620
3621 // Get the last argument, which specifies the vector type.
3622 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3623 std::optional<llvm::APSInt> Result =
3625 if (!Result)
3626 return nullptr;
3627
3628 // Determine the type of this overloaded NEON intrinsic.
3629 NeonTypeFlags Type = Result->getZExtValue();
3630 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3631 if (!Ty)
3632 return nullptr;
3633
3634 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3635
3636 // AArch64 scalar builtins are not overloaded, they do not have an extra
3637 // argument that specifies the vector type, need to handle each case.
3638 switch (BuiltinID) {
3639 case NEON::BI__builtin_neon_vtbl1_v: {
3640 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3641 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3642 }
3643 case NEON::BI__builtin_neon_vtbl2_v: {
3644 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3645 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3646 }
3647 case NEON::BI__builtin_neon_vtbl3_v: {
3648 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3649 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3650 }
3651 case NEON::BI__builtin_neon_vtbl4_v: {
3652 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3653 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3654 }
3655 case NEON::BI__builtin_neon_vtbx1_v: {
3656 Value *TblRes =
3657 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3658 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3659
3660 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3661 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3662 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3663
3664 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3665 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3666 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3667 }
3668 case NEON::BI__builtin_neon_vtbx2_v: {
3669 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3670 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3671 }
3672 case NEON::BI__builtin_neon_vtbx3_v: {
3673 Value *TblRes =
3674 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3675 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3676
3677 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3678 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3679 TwentyFourV);
3680 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3681
3682 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3683 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3684 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3685 }
3686 case NEON::BI__builtin_neon_vtbx4_v: {
3687 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3688 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3689 }
3690 case NEON::BI__builtin_neon_vqtbl1_v:
3691 case NEON::BI__builtin_neon_vqtbl1q_v:
3692 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3693 case NEON::BI__builtin_neon_vqtbl2_v:
3694 case NEON::BI__builtin_neon_vqtbl2q_v: {
3695 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3696 case NEON::BI__builtin_neon_vqtbl3_v:
3697 case NEON::BI__builtin_neon_vqtbl3q_v:
3698 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3699 case NEON::BI__builtin_neon_vqtbl4_v:
3700 case NEON::BI__builtin_neon_vqtbl4q_v:
3701 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3702 case NEON::BI__builtin_neon_vqtbx1_v:
3703 case NEON::BI__builtin_neon_vqtbx1q_v:
3704 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3705 case NEON::BI__builtin_neon_vqtbx2_v:
3706 case NEON::BI__builtin_neon_vqtbx2q_v:
3707 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3708 case NEON::BI__builtin_neon_vqtbx3_v:
3709 case NEON::BI__builtin_neon_vqtbx3q_v:
3710 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3711 case NEON::BI__builtin_neon_vqtbx4_v:
3712 case NEON::BI__builtin_neon_vqtbx4q_v:
3713 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3714 }
3715 }
3716
3717 if (!Int)
3718 return nullptr;
3719
3720 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3721 return CGF.EmitNeonCall(F, Ops, s);
3722}
3723
3725 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3726 Op = Builder.CreateBitCast(Op, Int16Ty);
3727 Value *V = PoisonValue::get(VTy);
3728 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3729 Op = Builder.CreateInsertElement(V, Op, CI);
3730 return Op;
3731}
3732
3733/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3734/// access builtin. Only required if it can't be inferred from the base pointer
3735/// operand.
3737 switch (TypeFlags.getMemEltType()) {
3738 case SVETypeFlags::MemEltTyDefault:
3739 return getEltType(TypeFlags);
3740 case SVETypeFlags::MemEltTyInt8:
3741 return Builder.getInt8Ty();
3742 case SVETypeFlags::MemEltTyInt16:
3743 return Builder.getInt16Ty();
3744 case SVETypeFlags::MemEltTyInt32:
3745 return Builder.getInt32Ty();
3746 case SVETypeFlags::MemEltTyInt64:
3747 return Builder.getInt64Ty();
3748 }
3749 llvm_unreachable("Unknown MemEltType");
3750}
3751
3752llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3753 switch (TypeFlags.getEltType()) {
3754 default:
3755 llvm_unreachable("Invalid SVETypeFlag!");
3756
3757 case SVETypeFlags::EltTyMFloat8:
3758 case SVETypeFlags::EltTyInt8:
3759 return Builder.getInt8Ty();
3760 case SVETypeFlags::EltTyInt16:
3761 return Builder.getInt16Ty();
3762 case SVETypeFlags::EltTyInt32:
3763 return Builder.getInt32Ty();
3764 case SVETypeFlags::EltTyInt64:
3765 return Builder.getInt64Ty();
3766 case SVETypeFlags::EltTyInt128:
3767 return Builder.getInt128Ty();
3768
3769 case SVETypeFlags::EltTyFloat16:
3770 return Builder.getHalfTy();
3771 case SVETypeFlags::EltTyFloat32:
3772 return Builder.getFloatTy();
3773 case SVETypeFlags::EltTyFloat64:
3774 return Builder.getDoubleTy();
3775
3776 case SVETypeFlags::EltTyBFloat16:
3777 return Builder.getBFloatTy();
3778
3779 case SVETypeFlags::EltTyBool8:
3780 case SVETypeFlags::EltTyBool16:
3781 case SVETypeFlags::EltTyBool32:
3782 case SVETypeFlags::EltTyBool64:
3783 return Builder.getInt1Ty();
3784 }
3785}
3786
3787// Return the llvm predicate vector type corresponding to the specified element
3788// TypeFlags.
3789llvm::ScalableVectorType *
3791 switch (TypeFlags.getEltType()) {
3792 default: llvm_unreachable("Unhandled SVETypeFlag!");
3793
3794 case SVETypeFlags::EltTyInt8:
3795 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3796 case SVETypeFlags::EltTyInt16:
3797 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3798 case SVETypeFlags::EltTyInt32:
3799 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3800 case SVETypeFlags::EltTyInt64:
3801 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3802
3803 case SVETypeFlags::EltTyBFloat16:
3804 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3805 case SVETypeFlags::EltTyFloat16:
3806 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3807 case SVETypeFlags::EltTyFloat32:
3808 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3809 case SVETypeFlags::EltTyFloat64:
3810 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3811
3812 case SVETypeFlags::EltTyBool8:
3813 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3814 case SVETypeFlags::EltTyBool16:
3815 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3816 case SVETypeFlags::EltTyBool32:
3817 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3818 case SVETypeFlags::EltTyBool64:
3819 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3820 }
3821}
3822
3823// Return the llvm vector type corresponding to the specified element TypeFlags.
3824llvm::ScalableVectorType *
3826 switch (TypeFlags.getEltType()) {
3827 default:
3828 llvm_unreachable("Invalid SVETypeFlag!");
3829
3830 case SVETypeFlags::EltTyInt8:
3831 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3832 case SVETypeFlags::EltTyInt16:
3833 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3834 case SVETypeFlags::EltTyInt32:
3835 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3836 case SVETypeFlags::EltTyInt64:
3837 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3838
3839 case SVETypeFlags::EltTyMFloat8:
3840 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3841 case SVETypeFlags::EltTyFloat16:
3842 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3843 case SVETypeFlags::EltTyBFloat16:
3844 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3845 case SVETypeFlags::EltTyFloat32:
3846 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3847 case SVETypeFlags::EltTyFloat64:
3848 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3849
3850 case SVETypeFlags::EltTyBool8:
3851 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3852 case SVETypeFlags::EltTyBool16:
3853 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3854 case SVETypeFlags::EltTyBool32:
3855 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3856 case SVETypeFlags::EltTyBool64:
3857 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3858 }
3859}
3860
3861llvm::Value *
3863 Function *Ptrue =
3864 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
3865 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
3866}
3867
3868constexpr unsigned SVEBitsPerBlock = 128;
3869
3870static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3871 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3872 return llvm::ScalableVectorType::get(EltTy, NumElts);
3873}
3874
3875// Reinterpret the input predicate so that it can be used to correctly isolate
3876// the elements of the specified datatype.
3878 llvm::ScalableVectorType *VTy) {
3879
3880 if (isa<TargetExtType>(Pred->getType()) &&
3881 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
3882 return Pred;
3883
3884 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
3885 if (Pred->getType() == RTy)
3886 return Pred;
3887
3888 unsigned IntID;
3889 llvm::Type *IntrinsicTy;
3890 switch (VTy->getMinNumElements()) {
3891 default:
3892 llvm_unreachable("unsupported element count!");
3893 case 1:
3894 case 2:
3895 case 4:
3896 case 8:
3897 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3898 IntrinsicTy = RTy;
3899 break;
3900 case 16:
3901 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3902 IntrinsicTy = Pred->getType();
3903 break;
3904 }
3905
3906 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
3907 Value *C = Builder.CreateCall(F, Pred);
3908 assert(C->getType() == RTy && "Unexpected return type!");
3909 return C;
3910}
3911
3913 llvm::StructType *Ty) {
3914 if (PredTuple->getType() == Ty)
3915 return PredTuple;
3916
3917 Value *Ret = llvm::PoisonValue::get(Ty);
3918 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3919 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
3920 Pred = EmitSVEPredicateCast(
3921 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
3922 Ret = Builder.CreateInsertValue(Ret, Pred, I);
3923 }
3924
3925 return Ret;
3926}
3927
3930 unsigned IntID) {
3931 auto *ResultTy = getSVEType(TypeFlags);
3932 auto *OverloadedTy =
3933 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
3934
3935 Function *F = nullptr;
3936 if (Ops[1]->getType()->isVectorTy())
3937 // This is the "vector base, scalar offset" case. In order to uniquely
3938 // map this built-in to an LLVM IR intrinsic, we need both the return type
3939 // and the type of the vector base.
3940 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
3941 else
3942 // This is the "scalar base, vector offset case". The type of the offset
3943 // is encoded in the name of the intrinsic. We only need to specify the
3944 // return type in order to uniquely map this built-in to an LLVM IR
3945 // intrinsic.
3946 F = CGM.getIntrinsic(IntID, OverloadedTy);
3947
3948 // At the ACLE level there's only one predicate type, svbool_t, which is
3949 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3950 // actual type being loaded. For example, when loading doubles (i64) the
3951 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3952 // the predicate and the data being loaded must match. Cast to the type
3953 // expected by the intrinsic. The intrinsic itself should be defined in
3954 // a way than enforces relations between parameter types.
3955 Ops[0] = EmitSVEPredicateCast(
3956 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
3957
3958 // Pass 0 when the offset is missing. This can only be applied when using
3959 // the "vector base" addressing mode for which ACLE allows no offset. The
3960 // corresponding LLVM IR always requires an offset.
3961 if (Ops.size() == 2) {
3962 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3963 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3964 }
3965
3966 // For "vector base, scalar index" scale the index so that it becomes a
3967 // scalar offset.
3968 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3969 unsigned BytesPerElt =
3970 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3971 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3972 }
3973
3974 Value *Call = Builder.CreateCall(F, Ops);
3975
3976 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
3977 // other cases it's folded into a nop.
3978 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
3979 : Builder.CreateSExt(Call, ResultTy);
3980}
3981
3984 unsigned IntID) {
3985 auto *SrcDataTy = getSVEType(TypeFlags);
3986 auto *OverloadedTy =
3987 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
3988
3989 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
3990 // it's the first argument. Move it accordingly.
3991 Ops.insert(Ops.begin(), Ops.pop_back_val());
3992
3993 Function *F = nullptr;
3994 if (Ops[2]->getType()->isVectorTy())
3995 // This is the "vector base, scalar offset" case. In order to uniquely
3996 // map this built-in to an LLVM IR intrinsic, we need both the return type
3997 // and the type of the vector base.
3998 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
3999 else
4000 // This is the "scalar base, vector offset case". The type of the offset
4001 // is encoded in the name of the intrinsic. We only need to specify the
4002 // return type in order to uniquely map this built-in to an LLVM IR
4003 // intrinsic.
4004 F = CGM.getIntrinsic(IntID, OverloadedTy);
4005
4006 // Pass 0 when the offset is missing. This can only be applied when using
4007 // the "vector base" addressing mode for which ACLE allows no offset. The
4008 // corresponding LLVM IR always requires an offset.
4009 if (Ops.size() == 3) {
4010 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4011 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4012 }
4013
4014 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
4015 // folded into a nop.
4016 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
4017
4018 // At the ACLE level there's only one predicate type, svbool_t, which is
4019 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4020 // actual type being stored. For example, when storing doubles (i64) the
4021 // predicated should be <n x 2 x i1> instead. At the IR level the type of
4022 // the predicate and the data being stored must match. Cast to the type
4023 // expected by the intrinsic. The intrinsic itself should be defined in
4024 // a way that enforces relations between parameter types.
4025 Ops[1] = EmitSVEPredicateCast(
4026 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
4027
4028 // For "vector base, scalar index" scale the index so that it becomes a
4029 // scalar offset.
4030 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4031 unsigned BytesPerElt =
4032 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4033 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
4034 }
4035
4036 return Builder.CreateCall(F, Ops);
4037}
4038
4041 unsigned IntID) {
4042 // The gather prefetches are overloaded on the vector input - this can either
4043 // be the vector of base addresses or vector of offsets.
4044 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
4045 if (!OverloadedTy)
4046 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
4047
4048 // Cast the predicate from svbool_t to the right number of elements.
4049 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
4050
4051 // vector + imm addressing modes
4052 if (Ops[1]->getType()->isVectorTy()) {
4053 if (Ops.size() == 3) {
4054 // Pass 0 for 'vector+imm' when the index is omitted.
4055 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4056
4057 // The sv_prfop is the last operand in the builtin and IR intrinsic.
4058 std::swap(Ops[2], Ops[3]);
4059 } else {
4060 // Index needs to be passed as scaled offset.
4061 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4062 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4063 if (BytesPerElt > 1)
4064 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4065 }
4066 }
4067
4068 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
4069 return Builder.CreateCall(F, Ops);
4070}
4071
4074 unsigned IntID) {
4075 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4076 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4077 Value *BasePtr = Ops[1];
4078
4079 // Does the load have an offset?
4080 if (Ops.size() > 2)
4081 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4082
4083 Function *F = CGM.getIntrinsic(IntID, {VTy});
4084 return Builder.CreateCall(F, {Predicate, BasePtr});
4085}
4086
4089 unsigned IntID) {
4090 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4091
4092 unsigned N;
4093 switch (IntID) {
4094 case Intrinsic::aarch64_sve_st2:
4095 case Intrinsic::aarch64_sve_st1_pn_x2:
4096 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4097 case Intrinsic::aarch64_sve_st2q:
4098 N = 2;
4099 break;
4100 case Intrinsic::aarch64_sve_st3:
4101 case Intrinsic::aarch64_sve_st3q:
4102 N = 3;
4103 break;
4104 case Intrinsic::aarch64_sve_st4:
4105 case Intrinsic::aarch64_sve_st1_pn_x4:
4106 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4107 case Intrinsic::aarch64_sve_st4q:
4108 N = 4;
4109 break;
4110 default:
4111 llvm_unreachable("unknown intrinsic!");
4112 }
4113
4114 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4115 Value *BasePtr = Ops[1];
4116
4117 // Does the store have an offset?
4118 if (Ops.size() > (2 + N))
4119 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4120
4121 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
4122 // need to break up the tuple vector.
4124 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4125 Operands.push_back(Ops[I]);
4126 Operands.append({Predicate, BasePtr});
4127 Function *F = CGM.getIntrinsic(IntID, { VTy });
4128
4129 return Builder.CreateCall(F, Operands);
4130}
4131
4132// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
4133// svpmullt_pair intrinsics, with the exception that their results are bitcast
4134// to a wider type.
4137 unsigned BuiltinID) {
4138 // Splat scalar operand to vector (intrinsics with _n infix)
4139 if (TypeFlags.hasSplatOperand()) {
4140 unsigned OpNo = TypeFlags.getSplatOperand();
4141 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4142 }
4143
4144 // The pair-wise function has a narrower overloaded type.
4145 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
4146 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
4147
4148 // Now bitcast to the wider result type.
4149 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
4150 return EmitSVEReinterpret(Call, Ty);
4151}
4152
4154 ArrayRef<Value *> Ops, unsigned BuiltinID) {
4155 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
4156 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
4157 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
4158}
4159
4162 unsigned BuiltinID) {
4163 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4164 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
4165 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4166
4167 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
4168 Value *BasePtr = Ops[1];
4169
4170 // Implement the index operand if not omitted.
4171 if (Ops.size() > 3)
4172 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4173
4174 Value *PrfOp = Ops.back();
4175
4176 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
4177 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
4178}
4179
4181 llvm::Type *ReturnTy,
4183 unsigned IntrinsicID,
4184 bool IsZExtReturn) {
4185 QualType LangPTy = E->getArg(1)->getType();
4186 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4187 LangPTy->castAs<PointerType>()->getPointeeType());
4188
4189 // Mfloat8 types is stored as a vector, so extra work
4190 // to extract sclar element type is necessary.
4191 if (MemEltTy->isVectorTy()) {
4192 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4193 "Only <1 x i8> expected");
4194 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4195 }
4196
4197 // The vector type that is returned may be different from the
4198 // eventual type loaded from memory.
4199 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
4200 llvm::ScalableVectorType *MemoryTy = nullptr;
4201 llvm::ScalableVectorType *PredTy = nullptr;
4202 bool IsQuadLoad = false;
4203 switch (IntrinsicID) {
4204 case Intrinsic::aarch64_sve_ld1uwq:
4205 case Intrinsic::aarch64_sve_ld1udq:
4206 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4207 PredTy = llvm::ScalableVectorType::get(
4208 llvm::Type::getInt1Ty(getLLVMContext()), 1);
4209 IsQuadLoad = true;
4210 break;
4211 default:
4212 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4213 PredTy = MemoryTy;
4214 break;
4215 }
4216
4217 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4218 Value *BasePtr = Ops[1];
4219
4220 // Does the load have an offset?
4221 if (Ops.size() > 2)
4222 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4223
4224 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
4225 auto *Load =
4226 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
4227 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4228 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
4229
4230 if (IsQuadLoad)
4231 return Load;
4232
4233 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
4234 : Builder.CreateSExt(Load, VectorTy);
4235}
4236
4239 unsigned IntrinsicID) {
4240 QualType LangPTy = E->getArg(1)->getType();
4241 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4242 LangPTy->castAs<PointerType>()->getPointeeType());
4243
4244 // Mfloat8 types is stored as a vector, so extra work
4245 // to extract sclar element type is necessary.
4246 if (MemEltTy->isVectorTy()) {
4247 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4248 "Only <1 x i8> expected");
4249 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4250 }
4251
4252 // The vector type that is stored may be different from the
4253 // eventual type stored to memory.
4254 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
4255 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4256
4257 auto PredTy = MemoryTy;
4258 auto AddrMemoryTy = MemoryTy;
4259 bool IsQuadStore = false;
4260
4261 switch (IntrinsicID) {
4262 case Intrinsic::aarch64_sve_st1wq:
4263 case Intrinsic::aarch64_sve_st1dq:
4264 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4265 PredTy =
4266 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
4267 IsQuadStore = true;
4268 break;
4269 default:
4270 break;
4271 }
4272 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4273 Value *BasePtr = Ops[1];
4274
4275 // Does the store have an offset?
4276 if (Ops.size() == 4)
4277 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
4278
4279 // Last value is always the data
4280 Value *Val =
4281 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
4282
4283 Function *F =
4284 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
4285 auto *Store =
4286 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
4287 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4288 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
4289 return Store;
4290}
4291
4294 unsigned IntID) {
4295 Ops[2] = EmitSVEPredicateCast(
4297
4298 SmallVector<Value *> NewOps;
4299 NewOps.push_back(Ops[2]);
4300
4301 llvm::Value *BasePtr = Ops[3];
4302 llvm::Value *RealSlice = Ops[1];
4303 // If the intrinsic contains the vnum parameter, multiply it with the vector
4304 // size in bytes.
4305 if (Ops.size() == 5) {
4306 Function *StreamingVectorLength =
4307 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
4308 llvm::Value *StreamingVectorLengthCall =
4309 Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
4310 llvm::ConstantInt::get(Int64Ty, 8), "svl",
4311 /* HasNUW */ true, /* HasNSW */ true);
4312 llvm::Value *Mulvl =
4313 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
4314 // The type of the ptr parameter is void *, so use Int8Ty here.
4315 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
4316 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
4317 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
4318 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
4319 }
4320 NewOps.push_back(BasePtr);
4321 NewOps.push_back(Ops[0]);
4322 NewOps.push_back(RealSlice);
4323 Function *F = CGM.getIntrinsic(IntID);
4324 return Builder.CreateCall(F, NewOps);
4325}
4326
4329 unsigned IntID) {
4330 auto *VecTy = getSVEType(TypeFlags);
4331 Function *F = CGM.getIntrinsic(IntID, VecTy);
4332 if (TypeFlags.isReadZA())
4333 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
4334 else if (TypeFlags.isWriteZA())
4335 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
4336 return Builder.CreateCall(F, Ops);
4337}
4338
4341 unsigned IntID) {
4342 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
4343 if (Ops.size() == 0)
4344 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
4345 Function *F = CGM.getIntrinsic(IntID, {});
4346 return Builder.CreateCall(F, Ops);
4347}
4348
4351 unsigned IntID) {
4352 if (Ops.size() == 2)
4353 Ops.push_back(Builder.getInt32(0));
4354 else
4355 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
4356 Function *F = CGM.getIntrinsic(IntID, {});
4357 return Builder.CreateCall(F, Ops);
4358}
4359
4360// Limit the usage of scalable llvm IR generated by the ACLE by using the
4361// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
4362Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
4363 return Builder.CreateVectorSplat(
4364 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
4365}
4366
4368 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4369#ifndef NDEBUG
4370 auto *VecTy = cast<llvm::VectorType>(Ty);
4371 ElementCount EC = VecTy->getElementCount();
4372 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
4373 "Only <1 x i8> expected");
4374#endif
4375 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
4376 }
4377 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
4378}
4379
4381 // FIXME: For big endian this needs an additional REV, or needs a separate
4382 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
4383 // instruction is defined as 'bitwise' equivalent from memory point of
4384 // view (when storing/reloading), whereas the svreinterpret builtin
4385 // implements bitwise equivalent cast from register point of view.
4386 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
4387
4388 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
4389 Value *Tuple = llvm::PoisonValue::get(Ty);
4390
4391 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4392 Value *In = Builder.CreateExtractValue(Val, I);
4393 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
4394 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
4395 }
4396
4397 return Tuple;
4398 }
4399
4400 return Builder.CreateBitCast(Val, Ty);
4401}
4402
4403static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4405 auto *SplatZero = Constant::getNullValue(Ty);
4406 Ops.insert(Ops.begin(), SplatZero);
4407}
4408
4409static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4411 auto *SplatUndef = UndefValue::get(Ty);
4412 Ops.insert(Ops.begin(), SplatUndef);
4413}
4414
4415SmallVector<llvm::Type *, 2>
4417 llvm::Type *ResultType,
4418 ArrayRef<Value *> Ops) {
4419 if (TypeFlags.isOverloadNone())
4420 return {};
4421
4422 llvm::Type *DefaultType = getSVEType(TypeFlags);
4423
4424 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
4425 return {DefaultType, Ops[1]->getType()};
4426
4427 if (TypeFlags.isOverloadWhileRW())
4428 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
4429
4430 if (TypeFlags.isOverloadCvt())
4431 return {Ops[0]->getType(), Ops.back()->getType()};
4432
4433 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
4434 ResultType->isVectorTy())
4435 return {ResultType, Ops[1]->getType()};
4436
4437 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
4438 return {DefaultType};
4439}
4440
4442 ArrayRef<Value *> Ops) {
4443 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
4444 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4445 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
4446
4447 if (TypeFlags.isTupleSet())
4448 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
4449 return Builder.CreateExtractValue(Ops[0], Idx);
4450}
4451
4453 llvm::Type *Ty,
4454 ArrayRef<Value *> Ops) {
4455 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
4456
4457 Value *Tuple = llvm::PoisonValue::get(Ty);
4458 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
4459 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
4460
4461 return Tuple;
4462}
4463
4465 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
4466 SVETypeFlags TypeFlags) {
4467 // Find out if any arguments are required to be integer constant expressions.
4468 unsigned ICEArguments = 0;
4470 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4471 assert(Error == ASTContext::GE_None && "Should not codegen an error");
4472
4473 // Tuple set/get only requires one insert/extract vector, which is
4474 // created by EmitSVETupleSetOrGet.
4475 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
4476
4477 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
4478 bool IsICE = ICEArguments & (1 << i);
4479 Value *Arg = EmitScalarExpr(E->getArg(i));
4480
4481 if (IsICE) {
4482 // If this is required to be a constant, constant fold it so that we know
4483 // that the generated intrinsic gets a ConstantInt.
4484 std::optional<llvm::APSInt> Result =
4486 assert(Result && "Expected argument to be a constant");
4487
4488 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4489 // truncate because the immediate has been range checked and no valid
4490 // immediate requires more than a handful of bits.
4491 *Result = Result->extOrTrunc(32);
4492 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
4493 continue;
4494 }
4495
4496 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4497 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4498 Ops.push_back(Builder.CreateExtractValue(Arg, I));
4499
4500 continue;
4501 }
4502
4503 Ops.push_back(Arg);
4504 }
4505}
4506
4508 const CallExpr *E) {
4509 llvm::Type *Ty = ConvertType(E->getType());
4510 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4511 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4512 Value *Val = EmitScalarExpr(E->getArg(0));
4513 return EmitSVEReinterpret(Val, Ty);
4514 }
4515
4518
4520 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4521 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4522
4523 if (TypeFlags.isLoad())
4524 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
4525 TypeFlags.isZExtReturn());
4526 else if (TypeFlags.isStore())
4527 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
4528 else if (TypeFlags.isGatherLoad())
4529 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4530 else if (TypeFlags.isScatterStore())
4531 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4532 else if (TypeFlags.isPrefetch())
4533 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4534 else if (TypeFlags.isGatherPrefetch())
4535 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4536 else if (TypeFlags.isStructLoad())
4537 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4538 else if (TypeFlags.isStructStore())
4539 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4540 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4541 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4542 else if (TypeFlags.isTupleCreate())
4543 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4544 else if (TypeFlags.isUndef())
4545 return UndefValue::get(Ty);
4546 else if (Builtin->LLVMIntrinsic != 0) {
4547 // Emit set FPMR for intrinsics that require it
4548 if (TypeFlags.setsFPMR())
4549 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4550 Ops.pop_back_val());
4551 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4553
4554 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4556
4557 // Some ACLE builtins leave out the argument to specify the predicate
4558 // pattern, which is expected to be expanded to an SV_ALL pattern.
4559 if (TypeFlags.isAppendSVALL())
4560 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4561 if (TypeFlags.isInsertOp1SVALL())
4562 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4563
4564 // Predicates must match the main datatype.
4565 for (Value *&Op : Ops)
4566 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4567 if (PredTy->getElementType()->isIntegerTy(1))
4568 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4569
4570 // Splat scalar operand to vector (intrinsics with _n infix)
4571 if (TypeFlags.hasSplatOperand()) {
4572 unsigned OpNo = TypeFlags.getSplatOperand();
4573 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4574 }
4575
4576 if (TypeFlags.isReverseCompare())
4577 std::swap(Ops[1], Ops[2]);
4578 else if (TypeFlags.isReverseUSDOT())
4579 std::swap(Ops[1], Ops[2]);
4580 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4581 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4582 std::swap(Ops[1], Ops[2]);
4583 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4584 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4585 std::swap(Ops[1], Ops[3]);
4586
4587 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4588 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4589 llvm::Type *OpndTy = Ops[1]->getType();
4590 auto *SplatZero = Constant::getNullValue(OpndTy);
4591 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4592 }
4593
4594 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4595 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4596 Value *Call = Builder.CreateCall(F, Ops);
4597
4598 if (Call->getType() == Ty)
4599 return Call;
4600
4601 // Predicate results must be converted to svbool_t.
4602 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4603 return EmitSVEPredicateCast(Call, PredTy);
4604 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4605 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4606
4607 llvm_unreachable("unsupported element count!");
4608 }
4609
4610 switch (BuiltinID) {
4611 default:
4612 return nullptr;
4613
4614 case SVE::BI__builtin_sve_svreinterpret_b: {
4615 auto SVCountTy =
4616 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4617 Function *CastFromSVCountF =
4618 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4619 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4620 }
4621 case SVE::BI__builtin_sve_svreinterpret_c: {
4622 auto SVCountTy =
4623 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4624 Function *CastToSVCountF =
4625 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4626 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4627 }
4628
4629 case SVE::BI__builtin_sve_svpsel_lane_b8:
4630 case SVE::BI__builtin_sve_svpsel_lane_b16:
4631 case SVE::BI__builtin_sve_svpsel_lane_b32:
4632 case SVE::BI__builtin_sve_svpsel_lane_b64:
4633 case SVE::BI__builtin_sve_svpsel_lane_c8:
4634 case SVE::BI__builtin_sve_svpsel_lane_c16:
4635 case SVE::BI__builtin_sve_svpsel_lane_c32:
4636 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4637 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4638 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4639 "aarch64.svcount")) &&
4640 "Unexpected TargetExtType");
4641 auto SVCountTy =
4642 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4643 Function *CastFromSVCountF =
4644 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4645 Function *CastToSVCountF =
4646 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4647
4648 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4649 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4650 llvm::Value *Ops0 =
4651 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4652 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4653 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4654 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4655 }
4656 case SVE::BI__builtin_sve_svmov_b_z: {
4657 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4658 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4659 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4660 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4661 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4662 }
4663
4664 case SVE::BI__builtin_sve_svnot_b_z: {
4665 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4666 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4667 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4668 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4669 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4670 }
4671
4672 case SVE::BI__builtin_sve_svmovlb_u16:
4673 case SVE::BI__builtin_sve_svmovlb_u32:
4674 case SVE::BI__builtin_sve_svmovlb_u64:
4675 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4676
4677 case SVE::BI__builtin_sve_svmovlb_s16:
4678 case SVE::BI__builtin_sve_svmovlb_s32:
4679 case SVE::BI__builtin_sve_svmovlb_s64:
4680 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4681
4682 case SVE::BI__builtin_sve_svmovlt_u16:
4683 case SVE::BI__builtin_sve_svmovlt_u32:
4684 case SVE::BI__builtin_sve_svmovlt_u64:
4685 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4686
4687 case SVE::BI__builtin_sve_svmovlt_s16:
4688 case SVE::BI__builtin_sve_svmovlt_s32:
4689 case SVE::BI__builtin_sve_svmovlt_s64:
4690 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4691
4692 case SVE::BI__builtin_sve_svpmullt_u16:
4693 case SVE::BI__builtin_sve_svpmullt_u64:
4694 case SVE::BI__builtin_sve_svpmullt_n_u16:
4695 case SVE::BI__builtin_sve_svpmullt_n_u64:
4696 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4697
4698 case SVE::BI__builtin_sve_svpmullb_u16:
4699 case SVE::BI__builtin_sve_svpmullb_u64:
4700 case SVE::BI__builtin_sve_svpmullb_n_u16:
4701 case SVE::BI__builtin_sve_svpmullb_n_u64:
4702 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4703
4704 case SVE::BI__builtin_sve_svdup_n_b8:
4705 case SVE::BI__builtin_sve_svdup_n_b16:
4706 case SVE::BI__builtin_sve_svdup_n_b32:
4707 case SVE::BI__builtin_sve_svdup_n_b64: {
4708 Value *CmpNE =
4709 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4710 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4711 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4713 }
4714
4715 case SVE::BI__builtin_sve_svdupq_n_b8:
4716 case SVE::BI__builtin_sve_svdupq_n_b16:
4717 case SVE::BI__builtin_sve_svdupq_n_b32:
4718 case SVE::BI__builtin_sve_svdupq_n_b64:
4719 case SVE::BI__builtin_sve_svdupq_n_u8:
4720 case SVE::BI__builtin_sve_svdupq_n_s8:
4721 case SVE::BI__builtin_sve_svdupq_n_u64:
4722 case SVE::BI__builtin_sve_svdupq_n_f64:
4723 case SVE::BI__builtin_sve_svdupq_n_s64:
4724 case SVE::BI__builtin_sve_svdupq_n_u16:
4725 case SVE::BI__builtin_sve_svdupq_n_f16:
4726 case SVE::BI__builtin_sve_svdupq_n_bf16:
4727 case SVE::BI__builtin_sve_svdupq_n_s16:
4728 case SVE::BI__builtin_sve_svdupq_n_u32:
4729 case SVE::BI__builtin_sve_svdupq_n_f32:
4730 case SVE::BI__builtin_sve_svdupq_n_s32: {
4731 // These builtins are implemented by storing each element to an array and using
4732 // ld1rq to materialize a vector.
4733 unsigned NumOpnds = Ops.size();
4734
4735 bool IsBoolTy =
4736 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4737
4738 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4739 // so that the compare can use the width that is natural for the expected
4740 // number of predicate lanes.
4741 llvm::Type *EltTy = Ops[0]->getType();
4742 if (IsBoolTy)
4743 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4744
4746 for (unsigned I = 0; I < NumOpnds; ++I)
4747 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4748 Value *Vec = BuildVector(VecOps);
4749
4750 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4751 Value *InsertSubVec = Builder.CreateInsertVector(
4752 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4753
4754 Function *F =
4755 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4756 Value *DupQLane =
4757 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4758
4759 if (!IsBoolTy)
4760 return DupQLane;
4761
4762 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4763 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4764
4765 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4766 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4767 : Intrinsic::aarch64_sve_cmpne_wide,
4768 OverloadedTy);
4769 Value *Call = Builder.CreateCall(
4770 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4772 }
4773
4774 case SVE::BI__builtin_sve_svpfalse_b:
4775 return ConstantInt::getFalse(Ty);
4776
4777 case SVE::BI__builtin_sve_svpfalse_c: {
4778 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4779 Function *CastToSVCountF =
4780 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4781 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4782 }
4783
4784 case SVE::BI__builtin_sve_svlen_bf16:
4785 case SVE::BI__builtin_sve_svlen_f16:
4786 case SVE::BI__builtin_sve_svlen_f32:
4787 case SVE::BI__builtin_sve_svlen_f64:
4788 case SVE::BI__builtin_sve_svlen_s8:
4789 case SVE::BI__builtin_sve_svlen_s16:
4790 case SVE::BI__builtin_sve_svlen_s32:
4791 case SVE::BI__builtin_sve_svlen_s64:
4792 case SVE::BI__builtin_sve_svlen_u8:
4793 case SVE::BI__builtin_sve_svlen_u16:
4794 case SVE::BI__builtin_sve_svlen_u32:
4795 case SVE::BI__builtin_sve_svlen_u64: {
4796 SVETypeFlags TF(Builtin->TypeModifier);
4797 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4798 }
4799
4800 case SVE::BI__builtin_sve_svtbl2_u8:
4801 case SVE::BI__builtin_sve_svtbl2_s8:
4802 case SVE::BI__builtin_sve_svtbl2_u16:
4803 case SVE::BI__builtin_sve_svtbl2_s16:
4804 case SVE::BI__builtin_sve_svtbl2_u32:
4805 case SVE::BI__builtin_sve_svtbl2_s32:
4806 case SVE::BI__builtin_sve_svtbl2_u64:
4807 case SVE::BI__builtin_sve_svtbl2_s64:
4808 case SVE::BI__builtin_sve_svtbl2_f16:
4809 case SVE::BI__builtin_sve_svtbl2_bf16:
4810 case SVE::BI__builtin_sve_svtbl2_f32:
4811 case SVE::BI__builtin_sve_svtbl2_f64: {
4812 SVETypeFlags TF(Builtin->TypeModifier);
4813 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4814 return Builder.CreateCall(F, Ops);
4815 }
4816
4817 case SVE::BI__builtin_sve_svset_neonq_s8:
4818 case SVE::BI__builtin_sve_svset_neonq_s16:
4819 case SVE::BI__builtin_sve_svset_neonq_s32:
4820 case SVE::BI__builtin_sve_svset_neonq_s64:
4821 case SVE::BI__builtin_sve_svset_neonq_u8:
4822 case SVE::BI__builtin_sve_svset_neonq_u16:
4823 case SVE::BI__builtin_sve_svset_neonq_u32:
4824 case SVE::BI__builtin_sve_svset_neonq_u64:
4825 case SVE::BI__builtin_sve_svset_neonq_f16:
4826 case SVE::BI__builtin_sve_svset_neonq_f32:
4827 case SVE::BI__builtin_sve_svset_neonq_f64:
4828 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4829 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4830 }
4831
4832 case SVE::BI__builtin_sve_svget_neonq_s8:
4833 case SVE::BI__builtin_sve_svget_neonq_s16:
4834 case SVE::BI__builtin_sve_svget_neonq_s32:
4835 case SVE::BI__builtin_sve_svget_neonq_s64:
4836 case SVE::BI__builtin_sve_svget_neonq_u8:
4837 case SVE::BI__builtin_sve_svget_neonq_u16:
4838 case SVE::BI__builtin_sve_svget_neonq_u32:
4839 case SVE::BI__builtin_sve_svget_neonq_u64:
4840 case SVE::BI__builtin_sve_svget_neonq_f16:
4841 case SVE::BI__builtin_sve_svget_neonq_f32:
4842 case SVE::BI__builtin_sve_svget_neonq_f64:
4843 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4844 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4845 }
4846
4847 case SVE::BI__builtin_sve_svdup_neonq_s8:
4848 case SVE::BI__builtin_sve_svdup_neonq_s16:
4849 case SVE::BI__builtin_sve_svdup_neonq_s32:
4850 case SVE::BI__builtin_sve_svdup_neonq_s64:
4851 case SVE::BI__builtin_sve_svdup_neonq_u8:
4852 case SVE::BI__builtin_sve_svdup_neonq_u16:
4853 case SVE::BI__builtin_sve_svdup_neonq_u32:
4854 case SVE::BI__builtin_sve_svdup_neonq_u64:
4855 case SVE::BI__builtin_sve_svdup_neonq_f16:
4856 case SVE::BI__builtin_sve_svdup_neonq_f32:
4857 case SVE::BI__builtin_sve_svdup_neonq_f64:
4858 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4859 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4860 uint64_t(0));
4861 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4862 {Insert, Builder.getInt64(0)});
4863 }
4864 }
4865
4866 /// Should not happen
4867 return nullptr;
4868}
4869
4870static void swapCommutativeSMEOperands(unsigned BuiltinID,
4872 unsigned MultiVec;
4873 switch (BuiltinID) {
4874 default:
4875 return;
4876 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4877 MultiVec = 1;
4878 break;
4879 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4880 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4881 MultiVec = 2;
4882 break;
4883 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4884 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4885 MultiVec = 4;
4886 break;
4887 }
4888
4889 if (MultiVec > 0)
4890 for (unsigned I = 0; I < MultiVec; ++I)
4891 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4892}
4893
4895 const CallExpr *E) {
4898
4900 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4901 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4902
4903 if (TypeFlags.isLoad() || TypeFlags.isStore())
4904 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4905 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4906 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4907 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4908 BuiltinID == SME::BI__builtin_sme_svzero_za)
4909 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4910 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4911 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4912 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4913 BuiltinID == SME::BI__builtin_sme_svstr_za)
4914 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4915
4916 // Emit set FPMR for intrinsics that require it
4917 if (TypeFlags.setsFPMR())
4918 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4919 Ops.pop_back_val());
4920 // Handle builtins which require their multi-vector operands to be swapped
4921 swapCommutativeSMEOperands(BuiltinID, Ops);
4922
4923 auto isCntsBuiltin = [&]() {
4924 switch (BuiltinID) {
4925 default:
4926 return 0;
4927 case SME::BI__builtin_sme_svcntsb:
4928 return 8;
4929 case SME::BI__builtin_sme_svcntsh:
4930 return 4;
4931 case SME::BI__builtin_sme_svcntsw:
4932 return 2;
4933 }
4934 };
4935
4936 if (auto Mul = isCntsBuiltin()) {
4937 llvm::Value *Cntd =
4938 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
4939 return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
4940 "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
4941 }
4942
4943 // Should not happen!
4944 if (Builtin->LLVMIntrinsic == 0)
4945 return nullptr;
4946
4947 // Predicates must match the main datatype.
4948 for (Value *&Op : Ops)
4949 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4950 if (PredTy->getElementType()->isIntegerTy(1))
4951 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4952
4953 Function *F =
4954 TypeFlags.isOverloadNone()
4955 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
4956 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
4957
4958 return Builder.CreateCall(F, Ops);
4959}
4960
4961/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
4962/// return it as an i8 pointer.
4964 LLVMContext &Context = CGF.CGM.getLLVMContext();
4965 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
4966 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4967 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4968 llvm::Function *F =
4969 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
4970 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
4971 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
4972}
4973
4975 const CallExpr *E,
4976 llvm::Triple::ArchType Arch) {
4977 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
4978 BuiltinID <= clang::AArch64::LastSVEBuiltin)
4979 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
4980
4981 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
4982 BuiltinID <= clang::AArch64::LastSMEBuiltin)
4983 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
4984
4985 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4986 return EmitAArch64CpuSupports(E);
4987
4988 unsigned HintID = static_cast<unsigned>(-1);
4989 switch (BuiltinID) {
4990 default: break;
4991 case clang::AArch64::BI__builtin_arm_nop:
4992 HintID = 0;
4993 break;
4994 case clang::AArch64::BI__builtin_arm_yield:
4995 case clang::AArch64::BI__yield:
4996 HintID = 1;
4997 break;
4998 case clang::AArch64::BI__builtin_arm_wfe:
4999 case clang::AArch64::BI__wfe:
5000 HintID = 2;
5001 break;
5002 case clang::AArch64::BI__builtin_arm_wfi:
5003 case clang::AArch64::BI__wfi:
5004 HintID = 3;
5005 break;
5006 case clang::AArch64::BI__builtin_arm_sev:
5007 case clang::AArch64::BI__sev:
5008 HintID = 4;
5009 break;
5010 case clang::AArch64::BI__builtin_arm_sevl:
5011 case clang::AArch64::BI__sevl:
5012 HintID = 5;
5013 break;
5014 }
5015
5016 if (HintID != static_cast<unsigned>(-1)) {
5017 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5018 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5019 }
5020
5021 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5022 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5023 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5024 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
5025 }
5026
5027 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5028 // Create call to __arm_sme_state and store the results to the two pointers.
5029 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
5030 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
5031 false),
5032 "__arm_sme_state"));
5033 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
5034 "aarch64_pstate_sm_compatible");
5035 CI->setAttributes(Attrs);
5036 CI->setCallingConv(
5037 llvm::CallingConv::
5038 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5039 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
5041 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
5043 }
5044
5045 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5046 assert((getContext().getTypeSize(E->getType()) == 32) &&
5047 "rbit of unusual size!");
5048 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5049 return Builder.CreateCall(
5050 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5051 }
5052 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5053 assert((getContext().getTypeSize(E->getType()) == 64) &&
5054 "rbit of unusual size!");
5055 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5056 return Builder.CreateCall(
5057 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5058 }
5059
5060 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5061 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5062 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5063 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
5064 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5065 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5066 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
5067 return Res;
5068 }
5069
5070 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5071 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5072 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
5073 "cls");
5074 }
5075 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5076 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5077 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
5078 "cls");
5079 }
5080
5081 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5082 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5083 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5084 llvm::Type *Ty = Arg->getType();
5085 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
5086 Arg, "frint32z");
5087 }
5088
5089 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5090 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5091 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5092 llvm::Type *Ty = Arg->getType();
5093 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
5094 Arg, "frint64z");
5095 }
5096
5097 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5098 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5099 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5100 llvm::Type *Ty = Arg->getType();
5101 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
5102 Arg, "frint32x");
5103 }
5104
5105 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5106 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5107 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5108 llvm::Type *Ty = Arg->getType();
5109 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
5110 Arg, "frint64x");
5111 }
5112
5113 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5114 assert((getContext().getTypeSize(E->getType()) == 32) &&
5115 "__jcvt of unusual size!");
5116 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5117 return Builder.CreateCall(
5118 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
5119 }
5120
5121 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5122 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5123 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5124 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5125 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
5126 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
5127
5128 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5129 // Load from the address via an LLVM intrinsic, receiving a
5130 // tuple of 8 i64 words, and store each one to ValPtr.
5131 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
5132 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
5133 llvm::Value *ToRet;
5134 for (size_t i = 0; i < 8; i++) {
5135 llvm::Value *ValOffsetPtr =
5136 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5137 Address Addr =
5138 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5139 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
5140 }
5141 return ToRet;
5142 } else {
5143 // Load 8 i64 words from ValPtr, and store them to the address
5144 // via an LLVM intrinsic.
5146 Args.push_back(MemAddr);
5147 for (size_t i = 0; i < 8; i++) {
5148 llvm::Value *ValOffsetPtr =
5149 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5150 Address Addr =
5151 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5152 Args.push_back(Builder.CreateLoad(Addr));
5153 }
5154
5155 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5156 ? Intrinsic::aarch64_st64b
5157 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5158 ? Intrinsic::aarch64_st64bv
5159 : Intrinsic::aarch64_st64bv0);
5160 Function *F = CGM.getIntrinsic(Intr);
5161 return Builder.CreateCall(F, Args);
5162 }
5163 }
5164
5165 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5166 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5167
5168 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5169 ? Intrinsic::aarch64_rndr
5170 : Intrinsic::aarch64_rndrrs);
5171 Function *F = CGM.getIntrinsic(Intr);
5172 llvm::Value *Val = Builder.CreateCall(F);
5173 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
5174 Value *Status = Builder.CreateExtractValue(Val, 1);
5175
5176 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
5177 Builder.CreateStore(RandomValue, MemAddress);
5178 Status = Builder.CreateZExt(Status, Int32Ty);
5179 return Status;
5180 }
5181
5182 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5183 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5184 const FunctionDecl *FD = E->getDirectCallee();
5185 Value *Ops[2];
5186 for (unsigned i = 0; i < 2; i++)
5187 Ops[i] = EmitScalarExpr(E->getArg(i));
5188 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5189 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5190 StringRef Name = FD->getName();
5191 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5192 }
5193
5194 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5195 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5196 getContext().getTypeSize(E->getType()) == 128) {
5197 Function *F =
5198 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5199 ? Intrinsic::aarch64_ldaxp
5200 : Intrinsic::aarch64_ldxp);
5201
5202 Value *LdPtr = EmitScalarExpr(E->getArg(0));
5203 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
5204
5205 Value *Val0 = Builder.CreateExtractValue(Val, 1);
5206 Value *Val1 = Builder.CreateExtractValue(Val, 0);
5207 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5208 Val0 = Builder.CreateZExt(Val0, Int128Ty);
5209 Val1 = Builder.CreateZExt(Val1, Int128Ty);
5210
5211 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5212 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5213 Val = Builder.CreateOr(Val, Val1);
5214 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5215 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5216 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5217 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5218
5219 QualType Ty = E->getType();
5220 llvm::Type *RealResTy = ConvertType(Ty);
5221 llvm::Type *IntTy =
5222 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5223
5224 Function *F =
5225 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5226 ? Intrinsic::aarch64_ldaxr
5227 : Intrinsic::aarch64_ldxr,
5228 UnqualPtrTy);
5229 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5230 Val->addParamAttr(
5231 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
5232
5233 if (RealResTy->isPointerTy())
5234 return Builder.CreateIntToPtr(Val, RealResTy);
5235
5236 llvm::Type *IntResTy = llvm::IntegerType::get(
5237 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5238 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
5239 RealResTy);
5240 }
5241
5242 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5243 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5244 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5245 Function *F =
5246 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5247 ? Intrinsic::aarch64_stlxp
5248 : Intrinsic::aarch64_stxp);
5249 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5250
5251 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5252 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5253
5254 Tmp = Tmp.withElementType(STy);
5255 llvm::Value *Val = Builder.CreateLoad(Tmp);
5256
5257 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5258 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5259 Value *StPtr = EmitScalarExpr(E->getArg(1));
5260 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5261 }
5262
5263 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5264 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5265 Value *StoreVal = EmitScalarExpr(E->getArg(0));
5266 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5267
5268 QualType Ty = E->getArg(0)->getType();
5269 llvm::Type *StoreTy =
5270 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5271
5272 if (StoreVal->getType()->isPointerTy())
5273 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5274 else {
5275 llvm::Type *IntTy = llvm::IntegerType::get(
5277 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5278 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5279 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5280 }
5281
5282 Function *F =
5283 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5284 ? Intrinsic::aarch64_stlxr
5285 : Intrinsic::aarch64_stxr,
5286 StoreAddr->getType());
5287 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5288 CI->addParamAttr(
5289 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
5290 return CI;
5291 }
5292
5293 if (BuiltinID == clang::AArch64::BI__getReg) {
5295 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5296 llvm_unreachable("Sema will ensure that the parameter is constant");
5297
5298 llvm::APSInt Value = Result.Val.getInt();
5299 LLVMContext &Context = CGM.getLLVMContext();
5300 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
5301
5302 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5303 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5304 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5305
5306 llvm::Function *F =
5307 CGM.getIntrinsic(Intrinsic::read_register, {Int64Ty});
5308 return Builder.CreateCall(F, Metadata);
5309 }
5310
5311 if (BuiltinID == clang::AArch64::BI__break) {
5313 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5314 llvm_unreachable("Sema will ensure that the parameter is constant");
5315
5316 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5317 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5318 }
5319
5320 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5321 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5322 return Builder.CreateCall(F);
5323 }
5324
5325 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5326 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5327 llvm::SyncScope::SingleThread);
5328
5329 // CRC32
5330 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5331 switch (BuiltinID) {
5332 case clang::AArch64::BI__builtin_arm_crc32b:
5333 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5334 case clang::AArch64::BI__builtin_arm_crc32cb:
5335 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5336 case clang::AArch64::BI__builtin_arm_crc32h:
5337 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5338 case clang::AArch64::BI__builtin_arm_crc32ch:
5339 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5340 case clang::AArch64::BI__builtin_arm_crc32w:
5341 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5342 case clang::AArch64::BI__builtin_arm_crc32cw:
5343 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5344 case clang::AArch64::BI__builtin_arm_crc32d:
5345 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5346 case clang::AArch64::BI__builtin_arm_crc32cd:
5347 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5348 }
5349
5350 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5351 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5352 Value *Arg1 = EmitScalarExpr(E->getArg(1));
5353 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5354
5355 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5356 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5357
5358 return Builder.CreateCall(F, {Arg0, Arg1});
5359 }
5360
5361 // Memory Operations (MOPS)
5362 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5363 Value *Dst = EmitScalarExpr(E->getArg(0));
5364 Value *Val = EmitScalarExpr(E->getArg(1));
5365 Value *Size = EmitScalarExpr(E->getArg(2));
5366 Val = Builder.CreateTrunc(Val, Int8Ty);
5367 Size = Builder.CreateIntCast(Size, Int64Ty, false);
5368 return Builder.CreateCall(
5369 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5370 }
5371
5372 // Memory Tagging Extensions (MTE) Intrinsics
5373 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5374 switch (BuiltinID) {
5375 case clang::AArch64::BI__builtin_arm_irg:
5376 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
5377 case clang::AArch64::BI__builtin_arm_addg:
5378 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
5379 case clang::AArch64::BI__builtin_arm_gmi:
5380 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
5381 case clang::AArch64::BI__builtin_arm_ldg:
5382 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
5383 case clang::AArch64::BI__builtin_arm_stg:
5384 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
5385 case clang::AArch64::BI__builtin_arm_subp:
5386 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
5387 }
5388
5389 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5390 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5392 Value *Mask = EmitScalarExpr(E->getArg(1));
5393
5394 Mask = Builder.CreateZExt(Mask, Int64Ty);
5395 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5396 {Pointer, Mask});
5397 }
5398 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5400 Value *TagOffset = EmitScalarExpr(E->getArg(1));
5401
5402 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
5403 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5404 {Pointer, TagOffset});
5405 }
5406 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5408 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
5409
5410 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
5411 return Builder.CreateCall(
5412 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
5413 }
5414 // Although it is possible to supply a different return
5415 // address (first arg) to this intrinsic, for now we set
5416 // return address same as input address.
5417 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5418 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5419 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5420 {TagAddress, TagAddress});
5421 }
5422 // Although it is possible to supply a different tag (to set)
5423 // to this intrinsic (as first arg), for now we supply
5424 // the tag that is in input address arg (common use case).
5425 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5426 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5427 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5428 {TagAddress, TagAddress});
5429 }
5430 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5431 Value *PointerA = EmitScalarExpr(E->getArg(0));
5432 Value *PointerB = EmitScalarExpr(E->getArg(1));
5433 return Builder.CreateCall(
5434 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
5435 }
5436 }
5437
5438 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5439 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5440 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5441 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5442 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5443 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5444 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5445 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5446
5447 SpecialRegisterAccessKind AccessKind = Write;
5448 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5449 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5450 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5451 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5452 AccessKind = VolatileRead;
5453
5454 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5455 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5456
5457 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5458 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5459
5460 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5461 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5462
5463 llvm::Type *ValueType;
5464 llvm::Type *RegisterType = Int64Ty;
5465 if (Is32Bit) {
5466 ValueType = Int32Ty;
5467 } else if (Is128Bit) {
5468 llvm::Type *Int128Ty =
5469 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
5470 ValueType = Int128Ty;
5471 RegisterType = Int128Ty;
5472 } else if (IsPointerBuiltin) {
5473 ValueType = VoidPtrTy;
5474 } else {
5475 ValueType = Int64Ty;
5476 };
5477
5478 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
5479 AccessKind);
5480 }
5481
5482 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5483 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5484 BuiltinID == clang::AArch64::BI__sys) {
5485 LLVMContext &Context = CGM.getLLVMContext();
5486
5487 unsigned SysReg =
5488 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5489
5490 std::string SysRegStr;
5491 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5492 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5493 ? ((1 << 1) | ((SysReg >> 14) & 1))
5494 : 1;
5495 llvm::raw_string_ostream(SysRegStr)
5496 << SysRegOp0 << ":" << ((SysReg >> 11) & 7) << ":"
5497 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5498 << (SysReg & 7);
5499
5500 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5501 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5502 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5503
5504 llvm::Type *RegisterType = Int64Ty;
5505 llvm::Type *Types[] = { RegisterType };
5506
5507 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5508 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5509
5510 return Builder.CreateCall(F, Metadata);
5511 }
5512
5513 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5514 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5515 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5516 if (BuiltinID == clang::AArch64::BI__sys) {
5517 // Return 0 for convenience, even though MSVC returns some other undefined
5518 // value.
5519 Result = ConstantInt::get(Builder.getInt32Ty(), 0);
5520 }
5521 return Result;
5522 }
5523
5524 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5525 llvm::Function *F =
5526 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5527 return Builder.CreateCall(F);
5528 }
5529
5530 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5531 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5532 return Builder.CreateCall(F);
5533 }
5534
5535 if (BuiltinID == clang::AArch64::BI__mulh ||
5536 BuiltinID == clang::AArch64::BI__umulh) {
5537 llvm::Type *ResType = ConvertType(E->getType());
5538 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5539
5540 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5541 Value *LHS =
5542 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5543 Value *RHS =
5544 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5545
5546 Value *MulResult, *HigherBits;
5547 if (IsSigned) {
5548 MulResult = Builder.CreateNSWMul(LHS, RHS);
5549 HigherBits = Builder.CreateAShr(MulResult, 64);
5550 } else {
5551 MulResult = Builder.CreateNUWMul(LHS, RHS);
5552 HigherBits = Builder.CreateLShr(MulResult, 64);
5553 }
5554 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5555
5556 return HigherBits;
5557 }
5558
5559 if (BuiltinID == AArch64::BI__writex18byte ||
5560 BuiltinID == AArch64::BI__writex18word ||
5561 BuiltinID == AArch64::BI__writex18dword ||
5562 BuiltinID == AArch64::BI__writex18qword) {
5563 // Process the args first
5564 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5565 Value *DataArg = EmitScalarExpr(E->getArg(1));
5566
5567 // Read x18 as i8*
5568 llvm::Value *X18 = readX18AsPtr(*this);
5569
5570 // Store val at x18 + offset
5571 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5572 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5573 StoreInst *Store =
5574 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5575 return Store;
5576 }
5577
5578 if (BuiltinID == AArch64::BI__readx18byte ||
5579 BuiltinID == AArch64::BI__readx18word ||
5580 BuiltinID == AArch64::BI__readx18dword ||
5581 BuiltinID == AArch64::BI__readx18qword) {
5582 // Process the args first
5583 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5584
5585 // Read x18 as i8*
5586 llvm::Value *X18 = readX18AsPtr(*this);
5587
5588 // Load x18 + offset
5589 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5590 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5591 llvm::Type *IntTy = ConvertType(E->getType());
5592 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5593 return Load;
5594 }
5595
5596 if (BuiltinID == AArch64::BI__addx18byte ||
5597 BuiltinID == AArch64::BI__addx18word ||
5598 BuiltinID == AArch64::BI__addx18dword ||
5599 BuiltinID == AArch64::BI__addx18qword ||
5600 BuiltinID == AArch64::BI__incx18byte ||
5601 BuiltinID == AArch64::BI__incx18word ||
5602 BuiltinID == AArch64::BI__incx18dword ||
5603 BuiltinID == AArch64::BI__incx18qword) {
5604 llvm::Type *IntTy;
5605 bool isIncrement;
5606 switch (BuiltinID) {
5607 case AArch64::BI__incx18byte:
5608 IntTy = Int8Ty;
5609 isIncrement = true;
5610 break;
5611 case AArch64::BI__incx18word:
5612 IntTy = Int16Ty;
5613 isIncrement = true;
5614 break;
5615 case AArch64::BI__incx18dword:
5616 IntTy = Int32Ty;
5617 isIncrement = true;
5618 break;
5619 case AArch64::BI__incx18qword:
5620 IntTy = Int64Ty;
5621 isIncrement = true;
5622 break;
5623 default:
5624 IntTy = ConvertType(E->getArg(1)->getType());
5625 isIncrement = false;
5626 break;
5627 }
5628 // Process the args first
5629 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5630 Value *ValToAdd =
5631 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5632
5633 // Read x18 as i8*
5634 llvm::Value *X18 = readX18AsPtr(*this);
5635
5636 // Load x18 + offset
5637 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5638 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5639 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5640
5641 // Add values
5642 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5643
5644 // Store val at x18 + offset
5645 StoreInst *Store =
5646 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5647 return Store;
5648 }
5649
5650 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5651 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5652 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5653 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5654 Value *Arg = EmitScalarExpr(E->getArg(0));
5655 llvm::Type *RetTy = ConvertType(E->getType());
5656 return Builder.CreateBitCast(Arg, RetTy);
5657 }
5658
5659 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5660 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5661 BuiltinID == AArch64::BI_CountLeadingZeros ||
5662 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5663 Value *Arg = EmitScalarExpr(E->getArg(0));
5664 llvm::Type *ArgType = Arg->getType();
5665
5666 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5667 BuiltinID == AArch64::BI_CountLeadingOnes64)
5668 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5669
5670 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5671 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5672
5673 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5674 BuiltinID == AArch64::BI_CountLeadingZeros64)
5675 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5676 return Result;
5677 }
5678
5679 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5680 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5681 Value *Arg = EmitScalarExpr(E->getArg(0));
5682
5683 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5684 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5685 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5686
5687 Value *Result = Builder.CreateCall(F, Arg, "cls");
5688 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5689 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5690 return Result;
5691 }
5692
5693 if (BuiltinID == AArch64::BI_CountOneBits ||
5694 BuiltinID == AArch64::BI_CountOneBits64) {
5695 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5696 llvm::Type *ArgType = ArgValue->getType();
5697 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5698
5699 Value *Result = Builder.CreateCall(F, ArgValue);
5700 if (BuiltinID == AArch64::BI_CountOneBits64)
5701 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5702 return Result;
5703 }
5704
5705 if (BuiltinID == AArch64::BI__prefetch) {
5707 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5708 Value *Locality = ConstantInt::get(Int32Ty, 3);
5709 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5710 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5711 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5712 }
5713
5714 if (BuiltinID == AArch64::BI__hlt) {
5715 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5716 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5717
5718 // Return 0 for convenience, even though MSVC returns some other undefined
5719 // value.
5720 return ConstantInt::get(Builder.getInt32Ty(), 0);
5721 }
5722
5723 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5724 return Builder.CreateFPTrunc(
5725 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5726 Builder.getFloatTy()),
5727 Builder.getBFloatTy());
5728
5729 // Handle MSVC intrinsics before argument evaluation to prevent double
5730 // evaluation.
5731 if (std::optional<MSVCIntrin> MsvcIntId =
5733 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5734
5735 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5736 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5737 return P.first == BuiltinID;
5738 });
5739 if (It != end(NEONEquivalentIntrinsicMap))
5740 BuiltinID = It->second;
5741
5742 // Find out if any arguments are required to be integer constant
5743 // expressions.
5744 unsigned ICEArguments = 0;
5746 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5747 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5748
5750 Address PtrOp0 = Address::invalid();
5751 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5752 if (i == 0) {
5753 switch (BuiltinID) {
5754 case NEON::BI__builtin_neon_vld1_v:
5755 case NEON::BI__builtin_neon_vld1q_v:
5756 case NEON::BI__builtin_neon_vld1_dup_v:
5757 case NEON::BI__builtin_neon_vld1q_dup_v:
5758 case NEON::BI__builtin_neon_vld1_lane_v:
5759 case NEON::BI__builtin_neon_vld1q_lane_v:
5760 case NEON::BI__builtin_neon_vst1_v:
5761 case NEON::BI__builtin_neon_vst1q_v:
5762 case NEON::BI__builtin_neon_vst1_lane_v:
5763 case NEON::BI__builtin_neon_vst1q_lane_v:
5764 case NEON::BI__builtin_neon_vldap1_lane_s64:
5765 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5766 case NEON::BI__builtin_neon_vstl1_lane_s64:
5767 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5768 // Get the alignment for the argument in addition to the value;
5769 // we'll use it later.
5770 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5771 Ops.push_back(PtrOp0.emitRawPointer(*this));
5772 continue;
5773 }
5774 }
5775 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5776 }
5777
5778 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5779 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5780 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5781
5782 if (Builtin) {
5783 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5785 assert(Result && "SISD intrinsic should have been handled");
5786 return Result;
5787 }
5788
5789 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5791 if (std::optional<llvm::APSInt> Result =
5793 // Determine the type of this overloaded NEON intrinsic.
5794 Type = NeonTypeFlags(Result->getZExtValue());
5795
5796 bool usgn = Type.isUnsigned();
5797 bool quad = Type.isQuad();
5798
5799 // Handle non-overloaded intrinsics first.
5800 switch (BuiltinID) {
5801 default: break;
5802 case NEON::BI__builtin_neon_vabsh_f16:
5803 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5804 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5805 case NEON::BI__builtin_neon_vaddq_p128: {
5806 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5807 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5808 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5809 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5810 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5811 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5812 return Builder.CreateBitCast(Ops[0], Int128Ty);
5813 }
5814 case NEON::BI__builtin_neon_vldrq_p128: {
5815 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5816 Value *Ptr = EmitScalarExpr(E->getArg(0));
5817 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5819 }
5820 case NEON::BI__builtin_neon_vstrq_p128: {
5821 Value *Ptr = Ops[0];
5822 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5823 }
5824 case NEON::BI__builtin_neon_vcvts_f32_u32:
5825 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5826 usgn = true;
5827 [[fallthrough]];
5828 case NEON::BI__builtin_neon_vcvts_f32_s32:
5829 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5830 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5831 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5832 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5833 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5834 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5835 if (usgn)
5836 return Builder.CreateUIToFP(Ops[0], FTy);
5837 return Builder.CreateSIToFP(Ops[0], FTy);
5838 }
5839 case NEON::BI__builtin_neon_vcvth_f16_u16:
5840 case NEON::BI__builtin_neon_vcvth_f16_u32:
5841 case NEON::BI__builtin_neon_vcvth_f16_u64:
5842 usgn = true;
5843 [[fallthrough]];
5844 case NEON::BI__builtin_neon_vcvth_f16_s16:
5845 case NEON::BI__builtin_neon_vcvth_f16_s32:
5846 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5847 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5848 llvm::Type *FTy = HalfTy;
5849 llvm::Type *InTy;
5850 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5851 InTy = Int64Ty;
5852 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5853 InTy = Int32Ty;
5854 else
5855 InTy = Int16Ty;
5856 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5857 if (usgn)
5858 return Builder.CreateUIToFP(Ops[0], FTy);
5859 return Builder.CreateSIToFP(Ops[0], FTy);
5860 }
5861 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5862 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5863 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5864 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5865 case NEON::BI__builtin_neon_vcvth_u16_f16:
5866 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5867 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5868 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5869 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5870 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5871 unsigned Int;
5872 llvm::Type *InTy = Int16Ty;
5873 llvm::Type* FTy = HalfTy;
5874 llvm::Type *Tys[2] = {InTy, FTy};
5875 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5876 switch (BuiltinID) {
5877 default: llvm_unreachable("missing builtin ID in switch!");
5878 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5879 Int = Intrinsic::aarch64_neon_fcvtau; break;
5880 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5881 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5882 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5883 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5884 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5885 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5886 case NEON::BI__builtin_neon_vcvth_u16_f16:
5887 Int = Intrinsic::aarch64_neon_fcvtzu; break;
5888 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5889 Int = Intrinsic::aarch64_neon_fcvtas; break;
5890 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5891 Int = Intrinsic::aarch64_neon_fcvtms; break;
5892 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5893 Int = Intrinsic::aarch64_neon_fcvtns; break;
5894 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5895 Int = Intrinsic::aarch64_neon_fcvtps; break;
5896 case NEON::BI__builtin_neon_vcvth_s16_f16:
5897 Int = Intrinsic::aarch64_neon_fcvtzs; break;
5898 }
5899 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
5900 }
5901 case NEON::BI__builtin_neon_vcaleh_f16:
5902 case NEON::BI__builtin_neon_vcalth_f16:
5903 case NEON::BI__builtin_neon_vcageh_f16:
5904 case NEON::BI__builtin_neon_vcagth_f16: {
5905 unsigned Int;
5906 llvm::Type* InTy = Int32Ty;
5907 llvm::Type* FTy = HalfTy;
5908 llvm::Type *Tys[2] = {InTy, FTy};
5909 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5910 switch (BuiltinID) {
5911 default: llvm_unreachable("missing builtin ID in switch!");
5912 case NEON::BI__builtin_neon_vcageh_f16:
5913 Int = Intrinsic::aarch64_neon_facge; break;
5914 case NEON::BI__builtin_neon_vcagth_f16:
5915 Int = Intrinsic::aarch64_neon_facgt; break;
5916 case NEON::BI__builtin_neon_vcaleh_f16:
5917 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
5918 case NEON::BI__builtin_neon_vcalth_f16:
5919 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
5920 }
5921 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
5922 return Builder.CreateTrunc(Ops[0], Int16Ty);
5923 }
5924 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5925 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5926 unsigned Int;
5927 llvm::Type* InTy = Int32Ty;
5928 llvm::Type* FTy = HalfTy;
5929 llvm::Type *Tys[2] = {InTy, FTy};
5930 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5931 switch (BuiltinID) {
5932 default: llvm_unreachable("missing builtin ID in switch!");
5933 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5934 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5935 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5936 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5937 }
5938 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5939 return Builder.CreateTrunc(Ops[0], Int16Ty);
5940 }
5941 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5942 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5943 unsigned Int;
5944 llvm::Type* FTy = HalfTy;
5945 llvm::Type* InTy = Int32Ty;
5946 llvm::Type *Tys[2] = {FTy, InTy};
5947 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5948 switch (BuiltinID) {
5949 default: llvm_unreachable("missing builtin ID in switch!");
5950 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5951 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5952 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
5953 break;
5954 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5955 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5956 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
5957 break;
5958 }
5959 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5960 }
5961 case NEON::BI__builtin_neon_vpaddd_s64: {
5962 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
5963 Value *Vec = EmitScalarExpr(E->getArg(0));
5964 // The vector is v2f64, so make sure it's bitcast to that.
5965 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5966 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5967 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5968 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5969 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5970 // Pairwise addition of a v2f64 into a scalar f64.
5971 return Builder.CreateAdd(Op0, Op1, "vpaddd");
5972 }
5973 case NEON::BI__builtin_neon_vpaddd_f64: {
5974 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
5975 Value *Vec = EmitScalarExpr(E->getArg(0));
5976 // The vector is v2f64, so make sure it's bitcast to that.
5977 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5978 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5979 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5980 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5981 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5982 // Pairwise addition of a v2f64 into a scalar f64.
5983 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5984 }
5985 case NEON::BI__builtin_neon_vpadds_f32: {
5986 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
5987 Value *Vec = EmitScalarExpr(E->getArg(0));
5988 // The vector is v2f32, so make sure it's bitcast to that.
5989 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5990 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5991 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5992 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5993 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5994 // Pairwise addition of a v2f32 into a scalar f32.
5995 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5996 }
5997 case NEON::BI__builtin_neon_vceqzd_s64:
5998 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6001 ICmpInst::ICMP_EQ, "vceqz");
6002 case NEON::BI__builtin_neon_vceqzd_f64:
6003 case NEON::BI__builtin_neon_vceqzs_f32:
6004 case NEON::BI__builtin_neon_vceqzh_f16:
6005 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6008 ICmpInst::FCMP_OEQ, "vceqz");
6009 case NEON::BI__builtin_neon_vcgezd_s64:
6010 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6013 ICmpInst::ICMP_SGE, "vcgez");
6014 case NEON::BI__builtin_neon_vcgezd_f64:
6015 case NEON::BI__builtin_neon_vcgezs_f32:
6016 case NEON::BI__builtin_neon_vcgezh_f16:
6017 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6020 ICmpInst::FCMP_OGE, "vcgez");
6021 case NEON::BI__builtin_neon_vclezd_s64:
6022 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6025 ICmpInst::ICMP_SLE, "vclez");
6026 case NEON::BI__builtin_neon_vclezd_f64:
6027 case NEON::BI__builtin_neon_vclezs_f32:
6028 case NEON::BI__builtin_neon_vclezh_f16:
6029 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6032 ICmpInst::FCMP_OLE, "vclez");
6033 case NEON::BI__builtin_neon_vcgtzd_s64:
6034 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6037 ICmpInst::ICMP_SGT, "vcgtz");
6038 case NEON::BI__builtin_neon_vcgtzd_f64:
6039 case NEON::BI__builtin_neon_vcgtzs_f32:
6040 case NEON::BI__builtin_neon_vcgtzh_f16:
6041 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6044 ICmpInst::FCMP_OGT, "vcgtz");
6045 case NEON::BI__builtin_neon_vcltzd_s64:
6046 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6049 ICmpInst::ICMP_SLT, "vcltz");
6050
6051 case NEON::BI__builtin_neon_vcltzd_f64:
6052 case NEON::BI__builtin_neon_vcltzs_f32:
6053 case NEON::BI__builtin_neon_vcltzh_f16:
6054 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6057 ICmpInst::FCMP_OLT, "vcltz");
6058
6059 case NEON::BI__builtin_neon_vceqzd_u64: {
6060 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6061 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6062 Ops[0] =
6063 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
6064 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
6065 }
6066 case NEON::BI__builtin_neon_vceqd_f64:
6067 case NEON::BI__builtin_neon_vcled_f64:
6068 case NEON::BI__builtin_neon_vcltd_f64:
6069 case NEON::BI__builtin_neon_vcged_f64:
6070 case NEON::BI__builtin_neon_vcgtd_f64: {
6071 llvm::CmpInst::Predicate P;
6072 switch (BuiltinID) {
6073 default: llvm_unreachable("missing builtin ID in switch!");
6074 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6075 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6076 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6077 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6078 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6079 }
6080 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6081 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6082 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6083 if (P == llvm::FCmpInst::FCMP_OEQ)
6084 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6085 else
6086 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6087 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
6088 }
6089 case NEON::BI__builtin_neon_vceqs_f32:
6090 case NEON::BI__builtin_neon_vcles_f32:
6091 case NEON::BI__builtin_neon_vclts_f32:
6092 case NEON::BI__builtin_neon_vcges_f32:
6093 case NEON::BI__builtin_neon_vcgts_f32: {
6094 llvm::CmpInst::Predicate P;
6095 switch (BuiltinID) {
6096 default: llvm_unreachable("missing builtin ID in switch!");
6097 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6098 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6099 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6100 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6101 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6102 }
6103 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6104 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
6105 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
6106 if (P == llvm::FCmpInst::FCMP_OEQ)
6107 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6108 else
6109 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6110 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
6111 }
6112 case NEON::BI__builtin_neon_vceqh_f16:
6113 case NEON::BI__builtin_neon_vcleh_f16:
6114 case NEON::BI__builtin_neon_vclth_f16:
6115 case NEON::BI__builtin_neon_vcgeh_f16:
6116 case NEON::BI__builtin_neon_vcgth_f16: {
6117 llvm::CmpInst::Predicate P;
6118 switch (BuiltinID) {
6119 default: llvm_unreachable("missing builtin ID in switch!");
6120 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6121 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6122 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6123 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6124 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6125 }
6126 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6127 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
6128 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
6129 if (P == llvm::FCmpInst::FCMP_OEQ)
6130 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6131 else
6132 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6133 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
6134 }
6135 case NEON::BI__builtin_neon_vceqd_s64:
6136 case NEON::BI__builtin_neon_vceqd_u64:
6137 case NEON::BI__builtin_neon_vcgtd_s64:
6138 case NEON::BI__builtin_neon_vcgtd_u64:
6139 case NEON::BI__builtin_neon_vcltd_s64:
6140 case NEON::BI__builtin_neon_vcltd_u64:
6141 case NEON::BI__builtin_neon_vcged_u64:
6142 case NEON::BI__builtin_neon_vcged_s64:
6143 case NEON::BI__builtin_neon_vcled_u64:
6144 case NEON::BI__builtin_neon_vcled_s64: {
6145 llvm::CmpInst::Predicate P;
6146 switch (BuiltinID) {
6147 default: llvm_unreachable("missing builtin ID in switch!");
6148 case NEON::BI__builtin_neon_vceqd_s64:
6149 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6150 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6151 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6152 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6153 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6154 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6155 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6156 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6157 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6158 }
6159 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6160 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6161 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6162 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
6163 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
6164 }
6165 case NEON::BI__builtin_neon_vtstd_s64:
6166 case NEON::BI__builtin_neon_vtstd_u64: {
6167 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6168 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6169 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6170 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
6171 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6172 llvm::Constant::getNullValue(Int64Ty));
6173 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
6174 }
6175 case NEON::BI__builtin_neon_vset_lane_i8:
6176 case NEON::BI__builtin_neon_vset_lane_i16:
6177 case NEON::BI__builtin_neon_vset_lane_i32:
6178 case NEON::BI__builtin_neon_vset_lane_i64:
6179 case NEON::BI__builtin_neon_vset_lane_bf16:
6180 case NEON::BI__builtin_neon_vset_lane_f32:
6181 case NEON::BI__builtin_neon_vsetq_lane_i8:
6182 case NEON::BI__builtin_neon_vsetq_lane_i16:
6183 case NEON::BI__builtin_neon_vsetq_lane_i32:
6184 case NEON::BI__builtin_neon_vsetq_lane_i64:
6185 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6186 case NEON::BI__builtin_neon_vsetq_lane_f32:
6187 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6188 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6189 case NEON::BI__builtin_neon_vset_lane_f64:
6190 // The vector type needs a cast for the v1f64 variant.
6191 Ops[1] =
6192 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
6193 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6194 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6195 case NEON::BI__builtin_neon_vset_lane_mf8:
6196 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6197 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6198 // The input vector type needs a cast to scalar type.
6199 Ops[0] =
6200 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
6201 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6202 case NEON::BI__builtin_neon_vsetq_lane_f64:
6203 // The vector type needs a cast for the v2f64 variant.
6204 Ops[1] =
6205 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
6206 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6207 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6208
6209 case NEON::BI__builtin_neon_vget_lane_i8:
6210 case NEON::BI__builtin_neon_vdupb_lane_i8:
6211 Ops[0] =
6212 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
6213 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6214 "vget_lane");
6215 case NEON::BI__builtin_neon_vgetq_lane_i8:
6216 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6217 Ops[0] =
6218 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
6219 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6220 "vgetq_lane");
6221 case NEON::BI__builtin_neon_vget_lane_mf8:
6222 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6223 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6224 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6225 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6226 "vget_lane");
6227 case NEON::BI__builtin_neon_vget_lane_i16:
6228 case NEON::BI__builtin_neon_vduph_lane_i16:
6229 Ops[0] =
6230 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
6231 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6232 "vget_lane");
6233 case NEON::BI__builtin_neon_vgetq_lane_i16:
6234 case NEON::BI__builtin_neon_vduph_laneq_i16:
6235 Ops[0] =
6236 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
6237 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6238 "vgetq_lane");
6239 case NEON::BI__builtin_neon_vget_lane_i32:
6240 case NEON::BI__builtin_neon_vdups_lane_i32:
6241 Ops[0] =
6242 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
6243 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6244 "vget_lane");
6245 case NEON::BI__builtin_neon_vdups_lane_f32:
6246 Ops[0] =
6247 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6248 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6249 "vdups_lane");
6250 case NEON::BI__builtin_neon_vgetq_lane_i32:
6251 case NEON::BI__builtin_neon_vdups_laneq_i32:
6252 Ops[0] =
6253 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
6254 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6255 "vgetq_lane");
6256 case NEON::BI__builtin_neon_vget_lane_i64:
6257 case NEON::BI__builtin_neon_vdupd_lane_i64:
6258 Ops[0] =
6259 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
6260 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6261 "vget_lane");
6262 case NEON::BI__builtin_neon_vdupd_lane_f64:
6263 Ops[0] =
6264 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6265 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6266 "vdupd_lane");
6267 case NEON::BI__builtin_neon_vgetq_lane_i64:
6268 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6269 Ops[0] =
6270 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
6271 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6272 "vgetq_lane");
6273 case NEON::BI__builtin_neon_vget_lane_f32:
6274 Ops[0] =
6275 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6276 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6277 "vget_lane");
6278 case NEON::BI__builtin_neon_vget_lane_f64:
6279 Ops[0] =
6280 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6281 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6282 "vget_lane");
6283 case NEON::BI__builtin_neon_vgetq_lane_f32:
6284 case NEON::BI__builtin_neon_vdups_laneq_f32:
6285 Ops[0] =
6286 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
6287 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6288 "vgetq_lane");
6289 case NEON::BI__builtin_neon_vgetq_lane_f64:
6290 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6291 Ops[0] =
6292 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
6293 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6294 "vgetq_lane");
6295 case NEON::BI__builtin_neon_vaddh_f16:
6296 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6297 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
6298 case NEON::BI__builtin_neon_vsubh_f16:
6299 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6300 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
6301 case NEON::BI__builtin_neon_vmulh_f16:
6302 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6303 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
6304 case NEON::BI__builtin_neon_vdivh_f16:
6305 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6306 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
6307 case NEON::BI__builtin_neon_vfmah_f16:
6308 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6310 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6311 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
6312 case NEON::BI__builtin_neon_vfmsh_f16: {
6313 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
6314
6315 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6317 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6318 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
6319 }
6320 case NEON::BI__builtin_neon_vaddd_s64:
6321 case NEON::BI__builtin_neon_vaddd_u64:
6322 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
6323 case NEON::BI__builtin_neon_vsubd_s64:
6324 case NEON::BI__builtin_neon_vsubd_u64:
6325 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
6326 case NEON::BI__builtin_neon_vqdmlalh_s16:
6327 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6328 SmallVector<Value *, 2> ProductOps;
6329 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6330 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
6331 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6332 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6333 ProductOps, "vqdmlXl");
6334 Constant *CI = ConstantInt::get(SizeTy, 0);
6335 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6336
6337 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6338 ? Intrinsic::aarch64_neon_sqadd
6339 : Intrinsic::aarch64_neon_sqsub;
6340 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6341 }
6342 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6343 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6344 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6345 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6346 Ops, "vqshlu_n");
6347 }
6348 case NEON::BI__builtin_neon_vqshld_n_u64:
6349 case NEON::BI__builtin_neon_vqshld_n_s64: {
6350 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6351 ? Intrinsic::aarch64_neon_uqshl
6352 : Intrinsic::aarch64_neon_sqshl;
6353 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6354 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6355 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6356 }
6357 case NEON::BI__builtin_neon_vrshrd_n_u64:
6358 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6359 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6360 ? Intrinsic::aarch64_neon_urshl
6361 : Intrinsic::aarch64_neon_srshl;
6362 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6363 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6364 Ops[1] = ConstantInt::get(Int64Ty, -SV);
6365 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6366 }
6367 case NEON::BI__builtin_neon_vrsrad_n_u64:
6368 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6369 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6370 ? Intrinsic::aarch64_neon_urshl
6371 : Intrinsic::aarch64_neon_srshl;
6372 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6373 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
6374 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6375 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6376 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6377 }
6378 case NEON::BI__builtin_neon_vshld_n_s64:
6379 case NEON::BI__builtin_neon_vshld_n_u64: {
6380 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6381 return Builder.CreateShl(
6382 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6383 }
6384 case NEON::BI__builtin_neon_vshrd_n_s64: {
6385 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6386 return Builder.CreateAShr(
6387 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6388 Amt->getZExtValue())),
6389 "shrd_n");
6390 }
6391 case NEON::BI__builtin_neon_vshrd_n_u64: {
6392 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6393 uint64_t ShiftAmt = Amt->getZExtValue();
6394 // Right-shifting an unsigned value by its size yields 0.
6395 if (ShiftAmt == 64)
6396 return ConstantInt::get(Int64Ty, 0);
6397 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6398 "shrd_n");
6399 }
6400 case NEON::BI__builtin_neon_vsrad_n_s64: {
6401 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6402 Ops[1] = Builder.CreateAShr(
6403 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6404 Amt->getZExtValue())),
6405 "shrd_n");
6406 return Builder.CreateAdd(Ops[0], Ops[1]);
6407 }
6408 case NEON::BI__builtin_neon_vsrad_n_u64: {
6409 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6410 uint64_t ShiftAmt = Amt->getZExtValue();
6411 // Right-shifting an unsigned value by its size yields 0.
6412 // As Op + 0 = Op, return Ops[0] directly.
6413 if (ShiftAmt == 64)
6414 return Ops[0];
6415 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6416 "shrd_n");
6417 return Builder.CreateAdd(Ops[0], Ops[1]);
6418 }
6419 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6420 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6421 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6422 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6423 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6424 "lane");
6425 SmallVector<Value *, 2> ProductOps;
6426 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6427 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6428 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6429 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6430 ProductOps, "vqdmlXl");
6431 Constant *CI = ConstantInt::get(SizeTy, 0);
6432 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6433 Ops.pop_back();
6434
6435 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6436 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6437 ? Intrinsic::aarch64_neon_sqadd
6438 : Intrinsic::aarch64_neon_sqsub;
6439 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6440 }
6441 case NEON::BI__builtin_neon_vqdmlals_s32:
6442 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6443 SmallVector<Value *, 2> ProductOps;
6444 ProductOps.push_back(Ops[1]);
6445 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6446 Ops[1] =
6447 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6448 ProductOps, "vqdmlXl");
6449
6450 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6451 ? Intrinsic::aarch64_neon_sqadd
6452 : Intrinsic::aarch64_neon_sqsub;
6453 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6454 }
6455 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6456 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6457 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6458 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6459 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6460 "lane");
6461 SmallVector<Value *, 2> ProductOps;
6462 ProductOps.push_back(Ops[1]);
6463 ProductOps.push_back(Ops[2]);
6464 Ops[1] =
6465 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6466 ProductOps, "vqdmlXl");
6467 Ops.pop_back();
6468
6469 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6470 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6471 ? Intrinsic::aarch64_neon_sqadd
6472 : Intrinsic::aarch64_neon_sqsub;
6473 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6474 }
6475 case NEON::BI__builtin_neon_vget_lane_bf16:
6476 case NEON::BI__builtin_neon_vduph_lane_bf16:
6477 case NEON::BI__builtin_neon_vduph_lane_f16: {
6478 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6479 "vget_lane");
6480 }
6481 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6482 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6483 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6484 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6485 "vgetq_lane");
6486 }
6487 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6488 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6489 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6490 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6491 }
6492 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6493 SmallVector<int, 16> ConcatMask(8);
6494 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6495 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6496 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6497 llvm::Value *Trunc =
6498 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6499 return Builder.CreateShuffleVector(
6500 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6501 }
6502 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6503 SmallVector<int, 16> ConcatMask(8);
6504 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6505 SmallVector<int, 16> LoMask(4);
6506 std::iota(LoMask.begin(), LoMask.end(), 0);
6507 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6508 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6509 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
6510 llvm::Value *Inactive = Builder.CreateShuffleVector(
6511 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6512 llvm::Value *Trunc =
6513 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6514 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6515 }
6516
6517 case clang::AArch64::BI_InterlockedAdd:
6518 case clang::AArch64::BI_InterlockedAdd_acq:
6519 case clang::AArch64::BI_InterlockedAdd_rel:
6520 case clang::AArch64::BI_InterlockedAdd_nf:
6521 case clang::AArch64::BI_InterlockedAdd64:
6522 case clang::AArch64::BI_InterlockedAdd64_acq:
6523 case clang::AArch64::BI_InterlockedAdd64_rel:
6524 case clang::AArch64::BI_InterlockedAdd64_nf: {
6525 Address DestAddr = CheckAtomicAlignment(*this, E);
6526 Value *Val = EmitScalarExpr(E->getArg(1));
6527 llvm::AtomicOrdering Ordering;
6528 switch (BuiltinID) {
6529 case clang::AArch64::BI_InterlockedAdd:
6530 case clang::AArch64::BI_InterlockedAdd64:
6531 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6532 break;
6533 case clang::AArch64::BI_InterlockedAdd_acq:
6534 case clang::AArch64::BI_InterlockedAdd64_acq:
6535 Ordering = llvm::AtomicOrdering::Acquire;
6536 break;
6537 case clang::AArch64::BI_InterlockedAdd_rel:
6538 case clang::AArch64::BI_InterlockedAdd64_rel:
6539 Ordering = llvm::AtomicOrdering::Release;
6540 break;
6541 case clang::AArch64::BI_InterlockedAdd_nf:
6542 case clang::AArch64::BI_InterlockedAdd64_nf:
6543 Ordering = llvm::AtomicOrdering::Monotonic;
6544 break;
6545 default:
6546 llvm_unreachable("missing builtin ID in switch!");
6547 }
6548 AtomicRMWInst *RMWI =
6549 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6550 return Builder.CreateAdd(RMWI, Val);
6551 }
6552 }
6553
6554 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6555 llvm::Type *Ty = VTy;
6556 if (!Ty)
6557 return nullptr;
6558
6559 // Not all intrinsics handled by the common case work for AArch64 yet, so only
6560 // defer to common code if it's been added to our special map.
6563
6564 if (Builtin)
6566 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6567 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6568 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
6569
6570 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
6571 return V;
6572
6573 unsigned Int;
6574 bool ExtractLow = false;
6575 bool ExtendLaneArg = false;
6576 switch (BuiltinID) {
6577 default: return nullptr;
6578 case NEON::BI__builtin_neon_vbsl_v:
6579 case NEON::BI__builtin_neon_vbslq_v: {
6580 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6581 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6582 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6583 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6584
6585 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6586 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6587 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6588 return Builder.CreateBitCast(Ops[0], Ty);
6589 }
6590 case NEON::BI__builtin_neon_vfma_lane_v:
6591 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6592 // The ARM builtins (and instructions) have the addend as the first
6593 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6594 Value *Addend = Ops[0];
6595 Value *Multiplicand = Ops[1];
6596 Value *LaneSource = Ops[2];
6597 Ops[0] = Multiplicand;
6598 Ops[1] = LaneSource;
6599 Ops[2] = Addend;
6600
6601 // Now adjust things to handle the lane access.
6602 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6603 ? llvm::FixedVectorType::get(VTy->getElementType(),
6604 VTy->getNumElements() / 2)
6605 : VTy;
6606 llvm::Constant *cst = cast<Constant>(Ops[3]);
6607 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6608 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6609 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6610
6611 Ops.pop_back();
6612 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6613 : Intrinsic::fma;
6614 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6615 }
6616 case NEON::BI__builtin_neon_vfma_laneq_v: {
6617 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6618 // v1f64 fma should be mapped to Neon scalar f64 fma
6619 if (VTy && VTy->getElementType() == DoubleTy) {
6620 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6621 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6622 llvm::FixedVectorType *VTy =
6624 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6625 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6626 Value *Result;
6628 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6629 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6630 return Builder.CreateBitCast(Result, Ty);
6631 }
6632 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6633 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6634
6635 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6636 VTy->getNumElements() * 2);
6637 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6638 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6639 cast<ConstantInt>(Ops[3]));
6640 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6641
6643 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6644 {Ops[2], Ops[1], Ops[0]});
6645 }
6646 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6647 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6648 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6649
6650 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6651 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6653 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6654 {Ops[2], Ops[1], Ops[0]});
6655 }
6656 case NEON::BI__builtin_neon_vfmah_lane_f16:
6657 case NEON::BI__builtin_neon_vfmas_lane_f32:
6658 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6659 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6660 case NEON::BI__builtin_neon_vfmad_lane_f64:
6661 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6662 Ops.push_back(EmitScalarExpr(E->getArg(3)));
6663 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6664 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6666 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6667 {Ops[1], Ops[2], Ops[0]});
6668 }
6669 case NEON::BI__builtin_neon_vmull_v:
6670 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6671 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6672 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6673 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6674 case NEON::BI__builtin_neon_vmax_v:
6675 case NEON::BI__builtin_neon_vmaxq_v:
6676 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6677 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6678 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6679 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6680 case NEON::BI__builtin_neon_vmaxh_f16: {
6681 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6682 Int = Intrinsic::aarch64_neon_fmax;
6683 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6684 }
6685 case NEON::BI__builtin_neon_vmin_v:
6686 case NEON::BI__builtin_neon_vminq_v:
6687 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6688 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6689 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6690 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6691 case NEON::BI__builtin_neon_vminh_f16: {
6692 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6693 Int = Intrinsic::aarch64_neon_fmin;
6694 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6695 }
6696 case NEON::BI__builtin_neon_vabd_v:
6697 case NEON::BI__builtin_neon_vabdq_v:
6698 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6699 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6700 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6701 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6702 case NEON::BI__builtin_neon_vpadal_v:
6703 case NEON::BI__builtin_neon_vpadalq_v: {
6704 unsigned ArgElts = VTy->getNumElements();
6705 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6706 unsigned BitWidth = EltTy->getBitWidth();
6707 auto *ArgTy = llvm::FixedVectorType::get(
6708 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6709 llvm::Type* Tys[2] = { VTy, ArgTy };
6710 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6712 TmpOps.push_back(Ops[1]);
6713 Function *F = CGM.getIntrinsic(Int, Tys);
6714 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6715 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6716 return Builder.CreateAdd(tmp, addend);
6717 }
6718 case NEON::BI__builtin_neon_vpmin_v:
6719 case NEON::BI__builtin_neon_vpminq_v:
6720 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6721 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6722 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6723 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6724 case NEON::BI__builtin_neon_vpmax_v:
6725 case NEON::BI__builtin_neon_vpmaxq_v:
6726 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6727 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6728 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6729 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6730 case NEON::BI__builtin_neon_vminnm_v:
6731 case NEON::BI__builtin_neon_vminnmq_v:
6732 Int = Intrinsic::aarch64_neon_fminnm;
6733 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6734 case NEON::BI__builtin_neon_vminnmh_f16:
6735 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6736 Int = Intrinsic::aarch64_neon_fminnm;
6737 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6738 case NEON::BI__builtin_neon_vmaxnm_v:
6739 case NEON::BI__builtin_neon_vmaxnmq_v:
6740 Int = Intrinsic::aarch64_neon_fmaxnm;
6741 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6742 case NEON::BI__builtin_neon_vmaxnmh_f16:
6743 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6744 Int = Intrinsic::aarch64_neon_fmaxnm;
6745 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6746 case NEON::BI__builtin_neon_vrecpss_f32: {
6747 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6748 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6749 Ops, "vrecps");
6750 }
6751 case NEON::BI__builtin_neon_vrecpsd_f64:
6752 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6753 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6754 Ops, "vrecps");
6755 case NEON::BI__builtin_neon_vrecpsh_f16:
6756 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6757 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6758 Ops, "vrecps");
6759 case NEON::BI__builtin_neon_vqshrun_n_v:
6760 Int = Intrinsic::aarch64_neon_sqshrun;
6761 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6762 case NEON::BI__builtin_neon_vqrshrun_n_v:
6763 Int = Intrinsic::aarch64_neon_sqrshrun;
6764 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6765 case NEON::BI__builtin_neon_vqshrn_n_v:
6766 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6767 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6768 case NEON::BI__builtin_neon_vrshrn_n_v:
6769 Int = Intrinsic::aarch64_neon_rshrn;
6770 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6771 case NEON::BI__builtin_neon_vqrshrn_n_v:
6772 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6773 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6774 case NEON::BI__builtin_neon_vrndah_f16: {
6775 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6776 Int = Builder.getIsFPConstrained()
6777 ? Intrinsic::experimental_constrained_round
6778 : Intrinsic::round;
6779 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6780 }
6781 case NEON::BI__builtin_neon_vrnda_v:
6782 case NEON::BI__builtin_neon_vrndaq_v: {
6783 Int = Builder.getIsFPConstrained()
6784 ? Intrinsic::experimental_constrained_round
6785 : Intrinsic::round;
6786 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6787 }
6788 case NEON::BI__builtin_neon_vrndih_f16: {
6789 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6790 Int = Builder.getIsFPConstrained()
6791 ? Intrinsic::experimental_constrained_nearbyint
6792 : Intrinsic::nearbyint;
6793 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6794 }
6795 case NEON::BI__builtin_neon_vrndmh_f16: {
6796 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6797 Int = Builder.getIsFPConstrained()
6798 ? Intrinsic::experimental_constrained_floor
6799 : Intrinsic::floor;
6800 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6801 }
6802 case NEON::BI__builtin_neon_vrndm_v:
6803 case NEON::BI__builtin_neon_vrndmq_v: {
6804 Int = Builder.getIsFPConstrained()
6805 ? Intrinsic::experimental_constrained_floor
6806 : Intrinsic::floor;
6807 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6808 }
6809 case NEON::BI__builtin_neon_vrndnh_f16: {
6810 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6811 Int = Builder.getIsFPConstrained()
6812 ? Intrinsic::experimental_constrained_roundeven
6813 : Intrinsic::roundeven;
6814 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6815 }
6816 case NEON::BI__builtin_neon_vrndn_v:
6817 case NEON::BI__builtin_neon_vrndnq_v: {
6818 Int = Builder.getIsFPConstrained()
6819 ? Intrinsic::experimental_constrained_roundeven
6820 : Intrinsic::roundeven;
6821 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6822 }
6823 case NEON::BI__builtin_neon_vrndns_f32: {
6824 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6825 Int = Builder.getIsFPConstrained()
6826 ? Intrinsic::experimental_constrained_roundeven
6827 : Intrinsic::roundeven;
6828 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6829 }
6830 case NEON::BI__builtin_neon_vrndph_f16: {
6831 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6832 Int = Builder.getIsFPConstrained()
6833 ? Intrinsic::experimental_constrained_ceil
6834 : Intrinsic::ceil;
6835 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6836 }
6837 case NEON::BI__builtin_neon_vrndp_v:
6838 case NEON::BI__builtin_neon_vrndpq_v: {
6839 Int = Builder.getIsFPConstrained()
6840 ? Intrinsic::experimental_constrained_ceil
6841 : Intrinsic::ceil;
6842 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6843 }
6844 case NEON::BI__builtin_neon_vrndxh_f16: {
6845 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6846 Int = Builder.getIsFPConstrained()
6847 ? Intrinsic::experimental_constrained_rint
6848 : Intrinsic::rint;
6849 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6850 }
6851 case NEON::BI__builtin_neon_vrndx_v:
6852 case NEON::BI__builtin_neon_vrndxq_v: {
6853 Int = Builder.getIsFPConstrained()
6854 ? Intrinsic::experimental_constrained_rint
6855 : Intrinsic::rint;
6856 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6857 }
6858 case NEON::BI__builtin_neon_vrndh_f16: {
6859 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6860 Int = Builder.getIsFPConstrained()
6861 ? Intrinsic::experimental_constrained_trunc
6862 : Intrinsic::trunc;
6863 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6864 }
6865 case NEON::BI__builtin_neon_vrnd32x_f32:
6866 case NEON::BI__builtin_neon_vrnd32xq_f32:
6867 case NEON::BI__builtin_neon_vrnd32x_f64:
6868 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6869 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6870 Int = Intrinsic::aarch64_neon_frint32x;
6871 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
6872 }
6873 case NEON::BI__builtin_neon_vrnd32z_f32:
6874 case NEON::BI__builtin_neon_vrnd32zq_f32:
6875 case NEON::BI__builtin_neon_vrnd32z_f64:
6876 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6877 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6878 Int = Intrinsic::aarch64_neon_frint32z;
6879 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
6880 }
6881 case NEON::BI__builtin_neon_vrnd64x_f32:
6882 case NEON::BI__builtin_neon_vrnd64xq_f32:
6883 case NEON::BI__builtin_neon_vrnd64x_f64:
6884 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6885 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6886 Int = Intrinsic::aarch64_neon_frint64x;
6887 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
6888 }
6889 case NEON::BI__builtin_neon_vrnd64z_f32:
6890 case NEON::BI__builtin_neon_vrnd64zq_f32:
6891 case NEON::BI__builtin_neon_vrnd64z_f64:
6892 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6893 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6894 Int = Intrinsic::aarch64_neon_frint64z;
6895 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
6896 }
6897 case NEON::BI__builtin_neon_vrnd_v:
6898 case NEON::BI__builtin_neon_vrndq_v: {
6899 Int = Builder.getIsFPConstrained()
6900 ? Intrinsic::experimental_constrained_trunc
6901 : Intrinsic::trunc;
6902 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6903 }
6904 case NEON::BI__builtin_neon_vcvt_f64_v:
6905 case NEON::BI__builtin_neon_vcvtq_f64_v:
6906 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6907 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6908 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6909 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6910 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6911 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6912 "unexpected vcvt_f64_f32 builtin");
6913 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6914 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6915
6916 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6917 }
6918 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6919 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6920 "unexpected vcvt_f32_f64 builtin");
6921 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6922 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6923
6924 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6925 }
6926 case NEON::BI__builtin_neon_vcvt_s32_v:
6927 case NEON::BI__builtin_neon_vcvt_u32_v:
6928 case NEON::BI__builtin_neon_vcvt_s64_v:
6929 case NEON::BI__builtin_neon_vcvt_u64_v:
6930 case NEON::BI__builtin_neon_vcvt_s16_f16:
6931 case NEON::BI__builtin_neon_vcvt_u16_f16:
6932 case NEON::BI__builtin_neon_vcvtq_s32_v:
6933 case NEON::BI__builtin_neon_vcvtq_u32_v:
6934 case NEON::BI__builtin_neon_vcvtq_s64_v:
6935 case NEON::BI__builtin_neon_vcvtq_u64_v:
6936 case NEON::BI__builtin_neon_vcvtq_s16_f16:
6937 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
6938 Int =
6939 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
6940 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
6941 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
6942 }
6943 case NEON::BI__builtin_neon_vcvta_s16_f16:
6944 case NEON::BI__builtin_neon_vcvta_u16_f16:
6945 case NEON::BI__builtin_neon_vcvta_s32_v:
6946 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6947 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6948 case NEON::BI__builtin_neon_vcvta_u32_v:
6949 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6950 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6951 case NEON::BI__builtin_neon_vcvta_s64_v:
6952 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6953 case NEON::BI__builtin_neon_vcvta_u64_v:
6954 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6955 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6956 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6957 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6958 }
6959 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6960 case NEON::BI__builtin_neon_vcvtm_s32_v:
6961 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6962 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6963 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6964 case NEON::BI__builtin_neon_vcvtm_u32_v:
6965 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6966 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6967 case NEON::BI__builtin_neon_vcvtm_s64_v:
6968 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6969 case NEON::BI__builtin_neon_vcvtm_u64_v:
6970 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6971 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6972 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6973 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6974 }
6975 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6976 case NEON::BI__builtin_neon_vcvtn_s32_v:
6977 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6978 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6979 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6980 case NEON::BI__builtin_neon_vcvtn_u32_v:
6981 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6982 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6983 case NEON::BI__builtin_neon_vcvtn_s64_v:
6984 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6985 case NEON::BI__builtin_neon_vcvtn_u64_v:
6986 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6987 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6988 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6989 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6990 }
6991 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6992 case NEON::BI__builtin_neon_vcvtp_s32_v:
6993 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6994 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6995 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6996 case NEON::BI__builtin_neon_vcvtp_u32_v:
6997 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6998 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6999 case NEON::BI__builtin_neon_vcvtp_s64_v:
7000 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7001 case NEON::BI__builtin_neon_vcvtp_u64_v:
7002 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
7003 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
7004 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7005 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
7006 }
7007 case NEON::BI__builtin_neon_vmulx_v:
7008 case NEON::BI__builtin_neon_vmulxq_v: {
7009 Int = Intrinsic::aarch64_neon_fmulx;
7010 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
7011 }
7012 case NEON::BI__builtin_neon_vmulxh_lane_f16:
7013 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7014 // vmulx_lane should be mapped to Neon scalar mulx after
7015 // extracting the scalar element
7016 Ops.push_back(EmitScalarExpr(E->getArg(2)));
7017 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7018 Ops.pop_back();
7019 Int = Intrinsic::aarch64_neon_fmulx;
7020 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
7021 }
7022 case NEON::BI__builtin_neon_vmul_lane_v:
7023 case NEON::BI__builtin_neon_vmul_laneq_v: {
7024 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
7025 bool Quad = false;
7026 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7027 Quad = true;
7028 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7029 llvm::FixedVectorType *VTy =
7031 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7032 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7033 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
7034 return Builder.CreateBitCast(Result, Ty);
7035 }
7036 case NEON::BI__builtin_neon_vnegd_s64:
7037 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
7038 case NEON::BI__builtin_neon_vnegh_f16:
7039 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
7040 case NEON::BI__builtin_neon_vpmaxnm_v:
7041 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7042 Int = Intrinsic::aarch64_neon_fmaxnmp;
7043 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
7044 }
7045 case NEON::BI__builtin_neon_vpminnm_v:
7046 case NEON::BI__builtin_neon_vpminnmq_v: {
7047 Int = Intrinsic::aarch64_neon_fminnmp;
7048 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
7049 }
7050 case NEON::BI__builtin_neon_vsqrth_f16: {
7051 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7052 Int = Builder.getIsFPConstrained()
7053 ? Intrinsic::experimental_constrained_sqrt
7054 : Intrinsic::sqrt;
7055 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
7056 }
7057 case NEON::BI__builtin_neon_vsqrt_v:
7058 case NEON::BI__builtin_neon_vsqrtq_v: {
7059 Int = Builder.getIsFPConstrained()
7060 ? Intrinsic::experimental_constrained_sqrt
7061 : Intrinsic::sqrt;
7062 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7063 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
7064 }
7065 case NEON::BI__builtin_neon_vrbit_v:
7066 case NEON::BI__builtin_neon_vrbitq_v: {
7067 Int = Intrinsic::bitreverse;
7068 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
7069 }
7070 case NEON::BI__builtin_neon_vaddv_u8:
7071 // FIXME: These are handled by the AArch64 scalar code.
7072 usgn = true;
7073 [[fallthrough]];
7074 case NEON::BI__builtin_neon_vaddv_s8: {
7075 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7076 Ty = Int32Ty;
7077 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7078 llvm::Type *Tys[2] = { Ty, VTy };
7079 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7080 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7081 return Builder.CreateTrunc(Ops[0], Int8Ty);
7082 }
7083 case NEON::BI__builtin_neon_vaddv_u16:
7084 usgn = true;
7085 [[fallthrough]];
7086 case NEON::BI__builtin_neon_vaddv_s16: {
7087 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7088 Ty = Int32Ty;
7089 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7090 llvm::Type *Tys[2] = { Ty, VTy };
7091 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7092 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7093 return Builder.CreateTrunc(Ops[0], Int16Ty);
7094 }
7095 case NEON::BI__builtin_neon_vaddvq_u8:
7096 usgn = true;
7097 [[fallthrough]];
7098 case NEON::BI__builtin_neon_vaddvq_s8: {
7099 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7100 Ty = Int32Ty;
7101 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7102 llvm::Type *Tys[2] = { Ty, VTy };
7103 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7104 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7105 return Builder.CreateTrunc(Ops[0], Int8Ty);
7106 }
7107 case NEON::BI__builtin_neon_vaddvq_u16:
7108 usgn = true;
7109 [[fallthrough]];
7110 case NEON::BI__builtin_neon_vaddvq_s16: {
7111 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7112 Ty = Int32Ty;
7113 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7114 llvm::Type *Tys[2] = { Ty, VTy };
7115 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7116 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7117 return Builder.CreateTrunc(Ops[0], Int16Ty);
7118 }
7119 case NEON::BI__builtin_neon_vmaxv_u8: {
7120 Int = Intrinsic::aarch64_neon_umaxv;
7121 Ty = Int32Ty;
7122 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7123 llvm::Type *Tys[2] = { Ty, VTy };
7124 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7125 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7126 return Builder.CreateTrunc(Ops[0], Int8Ty);
7127 }
7128 case NEON::BI__builtin_neon_vmaxv_u16: {
7129 Int = Intrinsic::aarch64_neon_umaxv;
7130 Ty = Int32Ty;
7131 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7132 llvm::Type *Tys[2] = { Ty, VTy };
7133 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7134 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7135 return Builder.CreateTrunc(Ops[0], Int16Ty);
7136 }
7137 case NEON::BI__builtin_neon_vmaxvq_u8: {
7138 Int = Intrinsic::aarch64_neon_umaxv;
7139 Ty = Int32Ty;
7140 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7141 llvm::Type *Tys[2] = { Ty, VTy };
7142 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7143 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7144 return Builder.CreateTrunc(Ops[0], Int8Ty);
7145 }
7146 case NEON::BI__builtin_neon_vmaxvq_u16: {
7147 Int = Intrinsic::aarch64_neon_umaxv;
7148 Ty = Int32Ty;
7149 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7150 llvm::Type *Tys[2] = { Ty, VTy };
7151 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7152 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7153 return Builder.CreateTrunc(Ops[0], Int16Ty);
7154 }
7155 case NEON::BI__builtin_neon_vmaxv_s8: {
7156 Int = Intrinsic::aarch64_neon_smaxv;
7157 Ty = Int32Ty;
7158 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7159 llvm::Type *Tys[2] = { Ty, VTy };
7160 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7161 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7162 return Builder.CreateTrunc(Ops[0], Int8Ty);
7163 }
7164 case NEON::BI__builtin_neon_vmaxv_s16: {
7165 Int = Intrinsic::aarch64_neon_smaxv;
7166 Ty = Int32Ty;
7167 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7168 llvm::Type *Tys[2] = { Ty, VTy };
7169 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7170 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7171 return Builder.CreateTrunc(Ops[0], Int16Ty);
7172 }
7173 case NEON::BI__builtin_neon_vmaxvq_s8: {
7174 Int = Intrinsic::aarch64_neon_smaxv;
7175 Ty = Int32Ty;
7176 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7177 llvm::Type *Tys[2] = { Ty, VTy };
7178 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7179 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7180 return Builder.CreateTrunc(Ops[0], Int8Ty);
7181 }
7182 case NEON::BI__builtin_neon_vmaxvq_s16: {
7183 Int = Intrinsic::aarch64_neon_smaxv;
7184 Ty = Int32Ty;
7185 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7186 llvm::Type *Tys[2] = { Ty, VTy };
7187 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7188 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7189 return Builder.CreateTrunc(Ops[0], Int16Ty);
7190 }
7191 case NEON::BI__builtin_neon_vmaxv_f16: {
7192 Int = Intrinsic::aarch64_neon_fmaxv;
7193 Ty = HalfTy;
7194 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7195 llvm::Type *Tys[2] = { Ty, VTy };
7196 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7197 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7198 return Builder.CreateTrunc(Ops[0], HalfTy);
7199 }
7200 case NEON::BI__builtin_neon_vmaxvq_f16: {
7201 Int = Intrinsic::aarch64_neon_fmaxv;
7202 Ty = HalfTy;
7203 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7204 llvm::Type *Tys[2] = { Ty, VTy };
7205 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7206 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7207 return Builder.CreateTrunc(Ops[0], HalfTy);
7208 }
7209 case NEON::BI__builtin_neon_vminv_u8: {
7210 Int = Intrinsic::aarch64_neon_uminv;
7211 Ty = Int32Ty;
7212 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7213 llvm::Type *Tys[2] = { Ty, VTy };
7214 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7215 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7216 return Builder.CreateTrunc(Ops[0], Int8Ty);
7217 }
7218 case NEON::BI__builtin_neon_vminv_u16: {
7219 Int = Intrinsic::aarch64_neon_uminv;
7220 Ty = Int32Ty;
7221 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7222 llvm::Type *Tys[2] = { Ty, VTy };
7223 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7224 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7225 return Builder.CreateTrunc(Ops[0], Int16Ty);
7226 }
7227 case NEON::BI__builtin_neon_vminvq_u8: {
7228 Int = Intrinsic::aarch64_neon_uminv;
7229 Ty = Int32Ty;
7230 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7231 llvm::Type *Tys[2] = { Ty, VTy };
7232 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7233 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7234 return Builder.CreateTrunc(Ops[0], Int8Ty);
7235 }
7236 case NEON::BI__builtin_neon_vminvq_u16: {
7237 Int = Intrinsic::aarch64_neon_uminv;
7238 Ty = Int32Ty;
7239 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7240 llvm::Type *Tys[2] = { Ty, VTy };
7241 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7242 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7243 return Builder.CreateTrunc(Ops[0], Int16Ty);
7244 }
7245 case NEON::BI__builtin_neon_vminv_s8: {
7246 Int = Intrinsic::aarch64_neon_sminv;
7247 Ty = Int32Ty;
7248 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7249 llvm::Type *Tys[2] = { Ty, VTy };
7250 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7251 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7252 return Builder.CreateTrunc(Ops[0], Int8Ty);
7253 }
7254 case NEON::BI__builtin_neon_vminv_s16: {
7255 Int = Intrinsic::aarch64_neon_sminv;
7256 Ty = Int32Ty;
7257 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7258 llvm::Type *Tys[2] = { Ty, VTy };
7259 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7260 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7261 return Builder.CreateTrunc(Ops[0], Int16Ty);
7262 }
7263 case NEON::BI__builtin_neon_vminvq_s8: {
7264 Int = Intrinsic::aarch64_neon_sminv;
7265 Ty = Int32Ty;
7266 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7267 llvm::Type *Tys[2] = { Ty, VTy };
7268 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7269 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7270 return Builder.CreateTrunc(Ops[0], Int8Ty);
7271 }
7272 case NEON::BI__builtin_neon_vminvq_s16: {
7273 Int = Intrinsic::aarch64_neon_sminv;
7274 Ty = Int32Ty;
7275 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7276 llvm::Type *Tys[2] = { Ty, VTy };
7277 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7278 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7279 return Builder.CreateTrunc(Ops[0], Int16Ty);
7280 }
7281 case NEON::BI__builtin_neon_vminv_f16: {
7282 Int = Intrinsic::aarch64_neon_fminv;
7283 Ty = HalfTy;
7284 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7285 llvm::Type *Tys[2] = { Ty, VTy };
7286 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7287 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7288 return Builder.CreateTrunc(Ops[0], HalfTy);
7289 }
7290 case NEON::BI__builtin_neon_vminvq_f16: {
7291 Int = Intrinsic::aarch64_neon_fminv;
7292 Ty = HalfTy;
7293 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7294 llvm::Type *Tys[2] = { Ty, VTy };
7295 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7296 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7297 return Builder.CreateTrunc(Ops[0], HalfTy);
7298 }
7299 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7300 Int = Intrinsic::aarch64_neon_fmaxnmv;
7301 Ty = HalfTy;
7302 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7303 llvm::Type *Tys[2] = { Ty, VTy };
7304 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7305 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7306 return Builder.CreateTrunc(Ops[0], HalfTy);
7307 }
7308 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7309 Int = Intrinsic::aarch64_neon_fmaxnmv;
7310 Ty = HalfTy;
7311 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7312 llvm::Type *Tys[2] = { Ty, VTy };
7313 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7314 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7315 return Builder.CreateTrunc(Ops[0], HalfTy);
7316 }
7317 case NEON::BI__builtin_neon_vminnmv_f16: {
7318 Int = Intrinsic::aarch64_neon_fminnmv;
7319 Ty = HalfTy;
7320 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7321 llvm::Type *Tys[2] = { Ty, VTy };
7322 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7323 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7324 return Builder.CreateTrunc(Ops[0], HalfTy);
7325 }
7326 case NEON::BI__builtin_neon_vminnmvq_f16: {
7327 Int = Intrinsic::aarch64_neon_fminnmv;
7328 Ty = HalfTy;
7329 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7330 llvm::Type *Tys[2] = { Ty, VTy };
7331 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7332 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7333 return Builder.CreateTrunc(Ops[0], HalfTy);
7334 }
7335 case NEON::BI__builtin_neon_vmul_n_f64: {
7336 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7337 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
7338 return Builder.CreateFMul(Ops[0], RHS);
7339 }
7340 case NEON::BI__builtin_neon_vaddlv_u8: {
7341 Int = Intrinsic::aarch64_neon_uaddlv;
7342 Ty = Int32Ty;
7343 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7344 llvm::Type *Tys[2] = { Ty, VTy };
7345 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7346 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7347 return Builder.CreateTrunc(Ops[0], Int16Ty);
7348 }
7349 case NEON::BI__builtin_neon_vaddlv_u16: {
7350 Int = Intrinsic::aarch64_neon_uaddlv;
7351 Ty = Int32Ty;
7352 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7353 llvm::Type *Tys[2] = { Ty, VTy };
7354 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7355 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7356 }
7357 case NEON::BI__builtin_neon_vaddlvq_u8: {
7358 Int = Intrinsic::aarch64_neon_uaddlv;
7359 Ty = Int32Ty;
7360 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7361 llvm::Type *Tys[2] = { Ty, VTy };
7362 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7363 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7364 return Builder.CreateTrunc(Ops[0], Int16Ty);
7365 }
7366 case NEON::BI__builtin_neon_vaddlvq_u16: {
7367 Int = Intrinsic::aarch64_neon_uaddlv;
7368 Ty = Int32Ty;
7369 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7370 llvm::Type *Tys[2] = { Ty, VTy };
7371 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7372 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7373 }
7374 case NEON::BI__builtin_neon_vaddlv_s8: {
7375 Int = Intrinsic::aarch64_neon_saddlv;
7376 Ty = Int32Ty;
7377 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7378 llvm::Type *Tys[2] = { Ty, VTy };
7379 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7380 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7381 return Builder.CreateTrunc(Ops[0], Int16Ty);
7382 }
7383 case NEON::BI__builtin_neon_vaddlv_s16: {
7384 Int = Intrinsic::aarch64_neon_saddlv;
7385 Ty = Int32Ty;
7386 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7387 llvm::Type *Tys[2] = { Ty, VTy };
7388 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7389 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7390 }
7391 case NEON::BI__builtin_neon_vaddlvq_s8: {
7392 Int = Intrinsic::aarch64_neon_saddlv;
7393 Ty = Int32Ty;
7394 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7395 llvm::Type *Tys[2] = { Ty, VTy };
7396 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7397 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7398 return Builder.CreateTrunc(Ops[0], Int16Ty);
7399 }
7400 case NEON::BI__builtin_neon_vaddlvq_s16: {
7401 Int = Intrinsic::aarch64_neon_saddlv;
7402 Ty = Int32Ty;
7403 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7404 llvm::Type *Tys[2] = { Ty, VTy };
7405 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7406 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7407 }
7408 case NEON::BI__builtin_neon_vsri_n_v:
7409 case NEON::BI__builtin_neon_vsriq_n_v: {
7410 Int = Intrinsic::aarch64_neon_vsri;
7411 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7412 return EmitNeonCall(Intrin, Ops, "vsri_n");
7413 }
7414 case NEON::BI__builtin_neon_vsli_n_v:
7415 case NEON::BI__builtin_neon_vsliq_n_v: {
7416 Int = Intrinsic::aarch64_neon_vsli;
7417 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7418 return EmitNeonCall(Intrin, Ops, "vsli_n");
7419 }
7420 case NEON::BI__builtin_neon_vsra_n_v:
7421 case NEON::BI__builtin_neon_vsraq_n_v:
7422 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7423 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
7424 return Builder.CreateAdd(Ops[0], Ops[1]);
7425 case NEON::BI__builtin_neon_vrsra_n_v:
7426 case NEON::BI__builtin_neon_vrsraq_n_v: {
7427 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7429 TmpOps.push_back(Ops[1]);
7430 TmpOps.push_back(Ops[2]);
7431 Function* F = CGM.getIntrinsic(Int, Ty);
7432 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
7433 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7434 return Builder.CreateAdd(Ops[0], tmp);
7435 }
7436 case NEON::BI__builtin_neon_vld1_v:
7437 case NEON::BI__builtin_neon_vld1q_v: {
7438 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
7439 }
7440 case NEON::BI__builtin_neon_vst1_v:
7441 case NEON::BI__builtin_neon_vst1q_v:
7442 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7443 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7444 case NEON::BI__builtin_neon_vld1_lane_v:
7445 case NEON::BI__builtin_neon_vld1q_lane_v: {
7446 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7447 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7448 PtrOp0.getAlignment());
7449 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
7450 }
7451 case NEON::BI__builtin_neon_vldap1_lane_s64:
7452 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7453 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7454 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
7455 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
7456 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7457 Ops[0] = LI;
7458 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
7459 }
7460 case NEON::BI__builtin_neon_vld1_dup_v:
7461 case NEON::BI__builtin_neon_vld1q_dup_v: {
7462 Value *V = PoisonValue::get(Ty);
7463 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7464 PtrOp0.getAlignment());
7465 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
7466 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
7467 return EmitNeonSplat(Ops[0], CI);
7468 }
7469 case NEON::BI__builtin_neon_vst1_lane_v:
7470 case NEON::BI__builtin_neon_vst1q_lane_v:
7471 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7472 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7473 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7474 case NEON::BI__builtin_neon_vstl1_lane_s64:
7475 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7476 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7477 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7478 llvm::StoreInst *SI =
7479 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7480 SI->setAtomic(llvm::AtomicOrdering::Release);
7481 return SI;
7482 }
7483 case NEON::BI__builtin_neon_vld2_v:
7484 case NEON::BI__builtin_neon_vld2q_v: {
7485 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7486 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7487 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7488 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7489 }
7490 case NEON::BI__builtin_neon_vld3_v:
7491 case NEON::BI__builtin_neon_vld3q_v: {
7492 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7493 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7494 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7495 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7496 }
7497 case NEON::BI__builtin_neon_vld4_v:
7498 case NEON::BI__builtin_neon_vld4q_v: {
7499 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7500 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7501 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7502 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7503 }
7504 case NEON::BI__builtin_neon_vld2_dup_v:
7505 case NEON::BI__builtin_neon_vld2q_dup_v: {
7506 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7507 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7508 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7509 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7510 }
7511 case NEON::BI__builtin_neon_vld3_dup_v:
7512 case NEON::BI__builtin_neon_vld3q_dup_v: {
7513 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7514 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7515 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7516 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7517 }
7518 case NEON::BI__builtin_neon_vld4_dup_v:
7519 case NEON::BI__builtin_neon_vld4q_dup_v: {
7520 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7521 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7522 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7523 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7524 }
7525 case NEON::BI__builtin_neon_vld2_lane_v:
7526 case NEON::BI__builtin_neon_vld2q_lane_v: {
7527 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7528 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7529 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7530 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7531 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7532 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7533 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
7534 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7535 }
7536 case NEON::BI__builtin_neon_vld3_lane_v:
7537 case NEON::BI__builtin_neon_vld3q_lane_v: {
7538 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7539 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7540 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7541 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7542 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7543 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7544 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7545 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
7546 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7547 }
7548 case NEON::BI__builtin_neon_vld4_lane_v:
7549 case NEON::BI__builtin_neon_vld4q_lane_v: {
7550 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7551 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7552 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7553 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7554 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7555 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7556 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7557 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7558 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
7559 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7560 }
7561 case NEON::BI__builtin_neon_vst2_v:
7562 case NEON::BI__builtin_neon_vst2q_v: {
7563 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7564 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7565 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7566 Ops, "");
7567 }
7568 case NEON::BI__builtin_neon_vst2_lane_v:
7569 case NEON::BI__builtin_neon_vst2q_lane_v: {
7570 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7571 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7572 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7573 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7574 Ops, "");
7575 }
7576 case NEON::BI__builtin_neon_vst3_v:
7577 case NEON::BI__builtin_neon_vst3q_v: {
7578 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7579 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7580 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7581 Ops, "");
7582 }
7583 case NEON::BI__builtin_neon_vst3_lane_v:
7584 case NEON::BI__builtin_neon_vst3q_lane_v: {
7585 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7586 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7587 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7588 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7589 Ops, "");
7590 }
7591 case NEON::BI__builtin_neon_vst4_v:
7592 case NEON::BI__builtin_neon_vst4q_v: {
7593 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7594 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7595 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7596 Ops, "");
7597 }
7598 case NEON::BI__builtin_neon_vst4_lane_v:
7599 case NEON::BI__builtin_neon_vst4q_lane_v: {
7600 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7601 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7602 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7603 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7604 Ops, "");
7605 }
7606 case NEON::BI__builtin_neon_vtrn_v:
7607 case NEON::BI__builtin_neon_vtrnq_v: {
7608 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7609 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7610 Value *SV = nullptr;
7611
7612 for (unsigned vi = 0; vi != 2; ++vi) {
7613 SmallVector<int, 16> Indices;
7614 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7615 Indices.push_back(i+vi);
7616 Indices.push_back(i+e+vi);
7617 }
7618 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7619 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7620 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7621 }
7622 return SV;
7623 }
7624 case NEON::BI__builtin_neon_vuzp_v:
7625 case NEON::BI__builtin_neon_vuzpq_v: {
7626 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7627 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7628 Value *SV = nullptr;
7629
7630 for (unsigned vi = 0; vi != 2; ++vi) {
7631 SmallVector<int, 16> Indices;
7632 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7633 Indices.push_back(2*i+vi);
7634
7635 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7636 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7637 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7638 }
7639 return SV;
7640 }
7641 case NEON::BI__builtin_neon_vzip_v:
7642 case NEON::BI__builtin_neon_vzipq_v: {
7643 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7644 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7645 Value *SV = nullptr;
7646
7647 for (unsigned vi = 0; vi != 2; ++vi) {
7648 SmallVector<int, 16> Indices;
7649 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7650 Indices.push_back((i + vi*e) >> 1);
7651 Indices.push_back(((i + vi*e) >> 1)+e);
7652 }
7653 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7654 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7655 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7656 }
7657 return SV;
7658 }
7659 case NEON::BI__builtin_neon_vqtbl1q_v: {
7660 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7661 Ops, "vtbl1");
7662 }
7663 case NEON::BI__builtin_neon_vqtbl2q_v: {
7664 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7665 Ops, "vtbl2");
7666 }
7667 case NEON::BI__builtin_neon_vqtbl3q_v: {
7668 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7669 Ops, "vtbl3");
7670 }
7671 case NEON::BI__builtin_neon_vqtbl4q_v: {
7672 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7673 Ops, "vtbl4");
7674 }
7675 case NEON::BI__builtin_neon_vqtbx1q_v: {
7676 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7677 Ops, "vtbx1");
7678 }
7679 case NEON::BI__builtin_neon_vqtbx2q_v: {
7680 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7681 Ops, "vtbx2");
7682 }
7683 case NEON::BI__builtin_neon_vqtbx3q_v: {
7684 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7685 Ops, "vtbx3");
7686 }
7687 case NEON::BI__builtin_neon_vqtbx4q_v: {
7688 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7689 Ops, "vtbx4");
7690 }
7691 case NEON::BI__builtin_neon_vsqadd_v:
7692 case NEON::BI__builtin_neon_vsqaddq_v: {
7693 Int = Intrinsic::aarch64_neon_usqadd;
7694 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7695 }
7696 case NEON::BI__builtin_neon_vuqadd_v:
7697 case NEON::BI__builtin_neon_vuqaddq_v: {
7698 Int = Intrinsic::aarch64_neon_suqadd;
7699 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7700 }
7701
7702 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7703 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7704 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7705 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7706 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7707 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7708 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7709 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7710 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7711 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7712 llvm::Type *Tys[2];
7713 Tys[0] = Ty;
7714 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7715 /*isQuad*/ false));
7716 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7717 }
7718 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7719 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7720 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7721 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7722 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7723 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7724 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7725 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7726 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7727 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7728 llvm::Type *Tys[2];
7729 Tys[0] = Ty;
7730 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7731 /*isQuad*/ true));
7732 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7733 }
7734 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7735 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7736 case NEON::BI__builtin_neon_vluti2_lane_f16:
7737 case NEON::BI__builtin_neon_vluti2_lane_p16:
7738 case NEON::BI__builtin_neon_vluti2_lane_p8:
7739 case NEON::BI__builtin_neon_vluti2_lane_s16:
7740 case NEON::BI__builtin_neon_vluti2_lane_s8:
7741 case NEON::BI__builtin_neon_vluti2_lane_u16:
7742 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7743 Int = Intrinsic::aarch64_neon_vluti2_lane;
7744 llvm::Type *Tys[2];
7745 Tys[0] = Ty;
7746 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7747 /*isQuad*/ false));
7748 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7749 }
7750 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7751 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7752 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7753 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7754 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7755 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7756 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7757 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7758 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7759 Int = Intrinsic::aarch64_neon_vluti2_lane;
7760 llvm::Type *Tys[2];
7761 Tys[0] = Ty;
7762 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7763 /*isQuad*/ true));
7764 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7765 }
7766 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7767 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7768 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7769 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7770 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7771 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
7772 }
7773 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7774 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7775 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7776 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7777 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7778 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
7779 }
7780 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7781 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7782 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7783 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7784 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7785 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7786 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
7787 }
7788 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7789 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7790 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7791 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7792 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7793 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7794 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
7795 }
7796 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7797 ExtractLow = true;
7798 LLVM_FALLTHROUGH;
7799 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7800 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7801 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7802 llvm::FixedVectorType::get(BFloatTy, 8),
7803 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7804 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7805 ExtractLow = true;
7806 LLVM_FALLTHROUGH;
7807 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7808 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7809 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7810 llvm::FixedVectorType::get(BFloatTy, 8),
7811 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7812 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7813 ExtractLow = true;
7814 LLVM_FALLTHROUGH;
7815 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7816 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7817 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7818 llvm::FixedVectorType::get(HalfTy, 8),
7819 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7820 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7821 ExtractLow = true;
7822 LLVM_FALLTHROUGH;
7823 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7824 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7825 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7826 llvm::FixedVectorType::get(HalfTy, 8),
7827 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7828 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7829 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7830 llvm::FixedVectorType::get(Int8Ty, 8),
7831 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7832 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7833 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7834 llvm::FixedVectorType::get(Int8Ty, 8),
7835 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7836 E, "vfcvtn");
7837 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7838 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7839 llvm::FixedVectorType::get(Int8Ty, 16),
7840 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7841 E, "vfcvtn");
7842 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7843 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7844 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7845 uint64_t(0));
7846 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7847 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7848 }
7849
7850 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7851 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7852 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7853 Ops, E, "fdot2");
7854 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7855 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7856 ExtendLaneArg = true;
7857 LLVM_FALLTHROUGH;
7858 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7859 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7860 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7861 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7862 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7863 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7864 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7865 FloatTy, Ops, E, "fdot4");
7866 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7867 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7868 ExtendLaneArg = true;
7869 LLVM_FALLTHROUGH;
7870 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7871 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7872 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7873 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7874
7875 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7876 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7877 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7878 "vmlal");
7879 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7880 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7881 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7882 "vmlal");
7883 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7884 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7885 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7886 "vmlall");
7887 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7888 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7889 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7890 "vmlall");
7891 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7892 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7893 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7894 "vmlall");
7895 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7896 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7897 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7898 "vmlall");
7899 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7900 ExtendLaneArg = true;
7901 LLVM_FALLTHROUGH;
7902 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7903 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7904 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7905 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7906 ExtendLaneArg = true;
7907 LLVM_FALLTHROUGH;
7908 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7909 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7910 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7911 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7912 ExtendLaneArg = true;
7913 LLVM_FALLTHROUGH;
7914 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7915 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7916 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7917 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7918 ExtendLaneArg = true;
7919 LLVM_FALLTHROUGH;
7920 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7921 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7922 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7923 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7924 ExtendLaneArg = true;
7925 LLVM_FALLTHROUGH;
7926 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7927 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7928 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7929 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7930 ExtendLaneArg = true;
7931 LLVM_FALLTHROUGH;
7932 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7933 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7934 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7935 case NEON::BI__builtin_neon_vamin_f16:
7936 case NEON::BI__builtin_neon_vaminq_f16:
7937 case NEON::BI__builtin_neon_vamin_f32:
7938 case NEON::BI__builtin_neon_vaminq_f32:
7939 case NEON::BI__builtin_neon_vaminq_f64: {
7940 Int = Intrinsic::aarch64_neon_famin;
7941 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7942 }
7943 case NEON::BI__builtin_neon_vamax_f16:
7944 case NEON::BI__builtin_neon_vamaxq_f16:
7945 case NEON::BI__builtin_neon_vamax_f32:
7946 case NEON::BI__builtin_neon_vamaxq_f32:
7947 case NEON::BI__builtin_neon_vamaxq_f64: {
7948 Int = Intrinsic::aarch64_neon_famax;
7949 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7950 }
7951 case NEON::BI__builtin_neon_vscale_f16:
7952 case NEON::BI__builtin_neon_vscaleq_f16:
7953 case NEON::BI__builtin_neon_vscale_f32:
7954 case NEON::BI__builtin_neon_vscaleq_f32:
7955 case NEON::BI__builtin_neon_vscaleq_f64: {
7956 Int = Intrinsic::aarch64_neon_fp8_fscale;
7957 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7958 }
7959 }
7960}
7961
7963 const CallExpr *E) {
7964 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7965 BuiltinID == BPF::BI__builtin_btf_type_id ||
7966 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7967 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7968 "unexpected BPF builtin");
7969
7970 // A sequence number, injected into IR builtin functions, to
7971 // prevent CSE given the only difference of the function
7972 // may just be the debuginfo metadata.
7973 static uint32_t BuiltinSeqNum;
7974
7975 switch (BuiltinID) {
7976 default:
7977 llvm_unreachable("Unexpected BPF builtin");
7978 case BPF::BI__builtin_preserve_field_info: {
7979 const Expr *Arg = E->getArg(0);
7980 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7981
7982 if (!getDebugInfo()) {
7983 CGM.Error(E->getExprLoc(),
7984 "using __builtin_preserve_field_info() without -g");
7985 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7986 : EmitLValue(Arg).emitRawPointer(*this);
7987 }
7988
7989 // Enable underlying preserve_*_access_index() generation.
7990 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7991 IsInPreservedAIRegion = true;
7992 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7993 : EmitLValue(Arg).emitRawPointer(*this);
7994 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7995
7996 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7997 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7998
7999 // Built the IR for the preserve_field_info intrinsic.
8000 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
8001 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
8002 {FieldAddr->getType()});
8003 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
8004 }
8005 case BPF::BI__builtin_btf_type_id:
8006 case BPF::BI__builtin_preserve_type_info: {
8007 if (!getDebugInfo()) {
8008 CGM.Error(E->getExprLoc(), "using builtin function without -g");
8009 return nullptr;
8010 }
8011
8012 const Expr *Arg0 = E->getArg(0);
8013 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
8014 Arg0->getType(), Arg0->getExprLoc());
8015
8016 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
8017 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
8018 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
8019
8020 llvm::Function *FnDecl;
8021 if (BuiltinID == BPF::BI__builtin_btf_type_id)
8022 FnDecl = Intrinsic::getOrInsertDeclaration(
8023 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
8024 else
8025 FnDecl = Intrinsic::getOrInsertDeclaration(
8026 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
8027 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
8028 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
8029 return Fn;
8030 }
8031 case BPF::BI__builtin_preserve_enum_value: {
8032 if (!getDebugInfo()) {
8033 CGM.Error(E->getExprLoc(), "using builtin function without -g");
8034 return nullptr;
8035 }
8036
8037 const Expr *Arg0 = E->getArg(0);
8038 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
8039 Arg0->getType(), Arg0->getExprLoc());
8040
8041 // Find enumerator
8042 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
8043 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
8044 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
8045 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
8046
8047 auto InitVal = Enumerator->getInitVal();
8048 std::string InitValStr;
8049 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
8050 InitValStr = std::to_string(InitVal.getSExtValue());
8051 else
8052 InitValStr = std::to_string(InitVal.getZExtValue());
8053 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
8054 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
8055
8056 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
8057 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
8058 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
8059
8060 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
8061 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
8062 CallInst *Fn =
8063 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
8064 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
8065 return Fn;
8066 }
8067 }
8068}
8069
8072 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
8073 "Not a power-of-two sized vector!");
8074 bool AllConstants = true;
8075 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
8076 AllConstants &= isa<Constant>(Ops[i]);
8077
8078 // If this is a constant vector, create a ConstantVector.
8079 if (AllConstants) {
8081 for (llvm::Value *Op : Ops)
8082 CstOps.push_back(cast<Constant>(Op));
8083 return llvm::ConstantVector::get(CstOps);
8084 }
8085
8086 // Otherwise, insertelement the values to build the vector.
8087 Value *Result = llvm::PoisonValue::get(
8088 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
8089
8090 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
8091 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
8092
8093 return Result;
8094}
8095
8096Value *CodeGenFunction::EmitAArch64CpuInit() {
8097 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8098 llvm::FunctionCallee Func =
8099 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
8100 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
8101 cast<llvm::GlobalValue>(Func.getCallee())
8102 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
8103 return Builder.CreateCall(Func);
8104}
8105
8106Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
8107 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
8108 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
8110 ArgStr.split(Features, "+");
8111 for (auto &Feature : Features) {
8112 Feature = Feature.trim();
8113 if (!llvm::AArch64::parseFMVExtension(Feature))
8114 return Builder.getFalse();
8115 if (Feature != "default")
8116 Features.push_back(Feature);
8117 }
8118 return EmitAArch64CpuSupports(Features);
8119}
8120
8121llvm::Value *
8122CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
8123 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
8124 Value *Result = Builder.getTrue();
8125 if (FeaturesMask != 0) {
8126 // Get features from structure in runtime library
8127 // struct {
8128 // unsigned long long features;
8129 // } __aarch64_cpu_features;
8130 llvm::Type *STy = llvm::StructType::get(Int64Ty);
8131 llvm::Constant *AArch64CPUFeatures =
8132 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
8133 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
8134 llvm::Value *CpuFeatures = Builder.CreateGEP(
8135 STy, AArch64CPUFeatures,
8136 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
8137 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
8139 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
8140 Value *Bitset = Builder.CreateAnd(Features, Mask);
8141 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
8142 Result = Builder.CreateAnd(Result, Cmp);
8143 }
8144 return Result;
8145}
#define V(N, I)
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition ARM.cpp:2551
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition ARM.cpp:3391
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition ARM.cpp:588
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition ARM.cpp:401
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition ARM.cpp:3361
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition ARM.cpp:3354
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4403
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition ARM.cpp:1603
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3581
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4870
static bool AArch64SISDIntrinsicsProvenSorted
Definition ARM.cpp:1615
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition ARM.cpp:1585
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition ARM.cpp:2619
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition ARM.cpp:584
static bool AArch64SVEIntrinsicsProvenSorted
Definition ARM.cpp:1616
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition ARM.cpp:1620
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition ARM.cpp:4409
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition ARM.cpp:3350
static bool AArch64SMEIntrinsicsProvenSorted
Definition ARM.cpp:1617
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition ARM.cpp:3428
constexpr unsigned SVEBitsPerBlock
Definition ARM.cpp:3868
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition ARM.cpp:1427
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition ARM.cpp:359
#define NEONMAP0(NameBase)
Definition ARM.cpp:581
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition ARM.cpp:4963
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition ARM.cpp:3455
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:31
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition ARM.cpp:190
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition ARM.cpp:593
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition ARM.cpp:3417
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition ARM.cpp:3870
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition ARM.cpp:3443
SpecialRegisterAccessKind
Definition ARM.cpp:2542
@ VolatileRead
Definition ARM.cpp:2544
@ NormalRead
Definition ARM.cpp:2543
@ Write
Definition ARM.cpp:2545
@ UnsignedAlts
Definition ARM.cpp:551
@ Vectorize1ArgType
Definition ARM.cpp:556
@ FpCmpzModifiers
Definition ARM.cpp:560
@ Use64BitVectors
Definition ARM.cpp:553
@ VectorizeArgTypes
Definition ARM.cpp:548
@ VectorRetGetArgs01
Definition ARM.cpp:558
@ InventFloatType
Definition ARM.cpp:550
@ AddRetType
Definition ARM.cpp:543
@ Add2ArgTypes
Definition ARM.cpp:545
@ VectorizeRetType
Definition ARM.cpp:547
@ VectorRet
Definition ARM.cpp:557
@ Add1ArgType
Definition ARM.cpp:544
@ Use128BitVectors
Definition ARM.cpp:554
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition ARM.cpp:3383
static bool NEONSIMDIntrinsicsProvenSorted
Definition ARM.cpp:1612
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition ARM.cpp:912
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition ARM.cpp:1678
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition ARM.cpp:342
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition ARM.cpp:2469
static bool AArch64SIMDIntrinsicsProvenSorted
Definition ARM.cpp:1614
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition ARM.cpp:1187
TokenType getType() const
Returns the token's type, e.g.
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition SemaHLSL.cpp:55
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 float __ockl_bool s
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:188
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2879
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3083
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:3062
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3070
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1599
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
An aggregate value slot.
Definition CGValue.h:504
Address getAddress() const
Definition CGValue.h:644
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition ARM.cpp:3877
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:474
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:8071
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
Definition AMDGPU.cpp:258
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4072
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition ARM.cpp:4180
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition ARM.cpp:448
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4039
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4327
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition ARM.cpp:3736
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3982
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4237
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4894
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition ARM.cpp:4464
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:3928
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition ARM.cpp:1639
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3752
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition ARM.cpp:1740
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:4507
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:495
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:2664
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3825
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition ARM.cpp:7962
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4349
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4452
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4135
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3469
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition ARM.cpp:511
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4416
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition ARM.cpp:489
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition ARM.cpp:4974
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition ARM.cpp:458
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition ARM.cpp:3724
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition ARM.cpp:3570
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition ARM.cpp:2442
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:293
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition ARM.cpp:4153
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition ARM.cpp:3912
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition ARM.cpp:4160
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4339
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1515
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4087
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:186
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition ARM.cpp:4292
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3862
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition ARM.cpp:4380
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1631
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition ARM.cpp:3790
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition ARM.cpp:427
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition ARM.cpp:4441
This class organizes the cross-function state that is used while generating LLVM code.
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition CGValue.h:419
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:379
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3078
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3069
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:451
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:273
QualType getType() const
Definition Expr.h:144
Represents a function declaration or definition.
Definition Decl.h:1999
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:300
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3328
QualType getPointeeType() const
Definition TypeBase.h:3338
A (possibly-)qualified type.
Definition TypeBase.h:937
The collection of all-type qualifiers we support.
Definition TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition TargetInfo.h:706
bool isBigEndian() const
The base class of the type hierarchy.
Definition TypeBase.h:1833
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9168
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:752
QualType getType() const
Definition Decl.h:722
QualType getType() const
Definition Value.cpp:237
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:154
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:154
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
const FunctionProtoType * T
U cast(CodeGen::Address addr)
Definition Address.h:327
@ Enumerator
Enumerator value with fixed underlying type.
Definition Sema.h:826
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
#define trunc(__x)
Definition tgmath.h:1216
#define round(__x)
Definition tgmath.h:1148
#define rint(__x)
Definition tgmath.h:1131
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601