Lines Matching +full:ete +full:- +full:1
1 //=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
21 def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">,
23 def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
24 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">;
25 def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
27 def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
29 def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
31 def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
33 def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
35 def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
37 def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">,
39 def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">,
41 def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">,
42 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
43 def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">,
44 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">;
45 def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">,
47 def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">,
49 def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">,
51 def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
52 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
54 def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">,
57 def HasEL3 : Predicate<"Subtarget->hasEL3()">,
60 def HasVH : Predicate<"Subtarget->hasVH()">,
63 def HasLOR : Predicate<"Subtarget->hasLOR()">,
66 def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
69 def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">,
70 AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
72 def HasJS : Predicate<"Subtarget->hasJS()">,
75 def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
78 def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
81 def HasNV : Predicate<"Subtarget->hasNV()">,
84 def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
87 def HasDIT : Predicate<"Subtarget->hasDIT()">,
90 def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
93 def HasAM : Predicate<"Subtarget->hasAM()">,
96 def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
99 def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
100 AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
102 def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
105 def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
106 AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
108 def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
109 AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
110 def HasNEON : Predicate<"Subtarget->isNeonAvailable()">,
112 def HasSM4 : Predicate<"Subtarget->hasSM4()">,
114 def HasSHA3 : Predicate<"Subtarget->hasSHA3()">,
116 def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
118 def HasAES : Predicate<"Subtarget->hasAES()">,
120 def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
122 def HasCRC : Predicate<"Subtarget->hasCRC()">,
124 def HasCSSC : Predicate<"Subtarget->hasCSSC()">,
126 def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">;
127 def HasLSE : Predicate<"Subtarget->hasLSE()">,
129 def HasNoLSE : Predicate<"!Subtarget->hasLSE()">;
130 def HasRAS : Predicate<"Subtarget->hasRAS()">,
132 def HasRDM : Predicate<"Subtarget->hasRDM()">,
134 def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
136 def HasNoFullFP16 : Predicate<"!Subtarget->hasFullFP16()">;
137 def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
139 def HasSPE : Predicate<"Subtarget->hasSPE()">,
141 def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
143 "fuse-aes">;
144 def HasSVE : Predicate<"Subtarget->isSVEAvailable()">,
146 def HasSVE2 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2()">,
148 def HasSVE2p1 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()">,
150 def HasSVE2AES : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2AES()">,
151 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
152 def HasSVE2SM4 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SM4()">,
153 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
154 def HasSVE2SHA3 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SHA3()">,
155 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
156 def HasSVE2BitPerm : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2BitPerm()">,
157 … AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
158 def HasB16B16 : Predicate<"Subtarget->hasB16B16()">,
161 : Predicate<"Subtarget->hasSME()">,
163 def HasSME : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME()">,
165 def HasSMEF64F64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF64F64()">,
166 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
167 def HasSMEF16F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF16F16()">,
168 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
169 def HasSMEFA64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEFA64()">,
170 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
171 def HasSMEI16I64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEI16I64()">,
172 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
174 : Predicate<"Subtarget->hasSME2()">,
176 def HasSME2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2()">,
178 def HasSME2p1 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p1()">,
180 def HasFP8 : Predicate<"Subtarget->hasFP8()">,
182 def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">,
184 def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">,
186 def HasSSVE_FP8FMA : Predicate<"Subtarget->hasSSVE_FP8FMA() || "
187 "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
190 "ssve-fp8fma or (sve2 and fp8fma)">;
191 def HasFP8DOT2 : Predicate<"Subtarget->hasFP8DOT2()">,
193 def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
194 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
197 "ssve-fp8dot2 or (sve2 and fp8dot2)">;
198 def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">,
200 def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
201 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">,
204 "ssve-fp8dot4 or (sve2 and fp8dot4)">;
205 def HasLUT : Predicate<"Subtarget->hasLUT()">,
207 def HasSME_LUTv2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">,
208 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
209 def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">,
210 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
211 def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
212 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
217 : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
221 : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
225 : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
229 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
232 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
235 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">,
239 …: Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
241 "sme-f16f16 or sme-f8f16">;
246 : Predicate<"Subtarget->hasNEON()">,
248 def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
250 def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
252 def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
254 def HasSB : Predicate<"Subtarget->hasSB()">,
256 def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
258 def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
260 def HasBTI : Predicate<"Subtarget->hasBTI()">,
262 def HasMTE : Predicate<"Subtarget->hasMTE()">,
264 def HasTME : Predicate<"Subtarget->hasTME()">,
266 def HasETE : Predicate<"Subtarget->hasETE()">,
267 AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
268 def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
270 def HasBF16 : Predicate<"Subtarget->hasBF16()">,
272 def HasNoBF16 : Predicate<"!Subtarget->hasBF16()">;
273 def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
275 def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
277 def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
279 def HasFPAC : Predicate<"Subtarget->hasFPAC())">,
281 def HasXS : Predicate<"Subtarget->hasXS()">,
283 def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
285 def HasLS64 : Predicate<"Subtarget->hasLS64()">,
287 def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
289 def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
290 AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
291 def HasHBC : Predicate<"Subtarget->hasHBC()">,
293 def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
295 def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">,
297 def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">,
299 def HasITE : Predicate<"Subtarget->hasITE()">,
301 def HasTHE : Predicate<"Subtarget->hasTHE()">,
303 def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">,
305 def HasLSE128 : Predicate<"Subtarget->hasLSE128()">,
307 def HasD128 : Predicate<"Subtarget->hasD128()">,
309 def HasCHK : Predicate<"Subtarget->hasCHK()">,
311 def HasGCS : Predicate<"Subtarget->hasGCS()">,
313 def HasCPA : Predicate<"Subtarget->hasCPA()">,
315 def IsLE : Predicate<"Subtarget->isLittleEndian()">;
316 def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
317 def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
319 : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
321 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
327 def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
329 def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
331 def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
334 SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
335 SDTCisInt<1>]>>;
338 //===----------------------------------------------------------------------===//
339 // AArch64-specific DAG Nodes.
342 // SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
346 SDTCisInt<0>, SDTCisVT<1, i32>]>;
348 // SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
349 def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
350 [SDTCisSameAs<0, 1>,
355 // SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
360 SDTCisVT<1, i32>,
364 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
366 def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
367 def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
371 def SDT_AArch64CSel : SDTypeProfile<1, 4,
372 [SDTCisSameAs<0, 1>,
376 def SDT_AArch64CCMP : SDTypeProfile<1, 5,
378 SDTCisInt<1>,
379 SDTCisSameAs<1, 2>,
383 def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
385 SDTCisFP<1>,
386 SDTCisSameAs<1, 2>,
392 SDTCisSameAs<0, 1>]>;
393 def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
394 def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
395 def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
396 def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
397 SDTCisSameAs<0, 1>,
399 def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
400 def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
401 def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
403 def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
404 def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
406 def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
407 def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
410 def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
411 SDTCisSameAs<0,1>,
414 def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
415 def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
416 def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
417 def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
419 def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
423 def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
425 def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
427 def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
428 SDTCisPtrTy<1>]>;
430 def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
432 def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
433 def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
434 def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
435 def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
436 def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
437 def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
448 def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
451 def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
452 [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
453 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
454 SDTCisSameAs<1, 4>]>;
456 def SDT_AArch64TBL : SDTypeProfile<1, 2, [
457 SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
460 // non-extending masked load fragment.
464 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
465 cast<MaskedLoadSDNode>(N)->isUnindexed() &&
466 !cast<MaskedLoadSDNode>(N)->isNonTemporal();
472 return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
473 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
474 cast<MaskedLoadSDNode>(N)->isUnindexed();
479 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
484 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
489 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
495 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
496 cast<MaskedLoadSDNode>(N)->isUnindexed();
501 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
506 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
511 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
517 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
518 cast<MaskedLoadSDNode>(N)->isUnindexed() &&
519 cast<MaskedLoadSDNode>(N)->isNonTemporal();
522 // non-truncating masked store fragment.
526 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
527 cast<MaskedStoreSDNode>(N)->isUnindexed() &&
528 !cast<MaskedStoreSDNode>(N)->isNonTemporal();
534 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
535 cast<MaskedStoreSDNode>(N)->isUnindexed();
540 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
545 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
550 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
556 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
557 cast<MaskedStoreSDNode>(N)->isUnindexed() &&
558 cast<MaskedStoreSDNode>(N)->isNonTemporal();
567 bool Signed = MGS->isIndexSigned() ||
568 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
569 return Signed && MGS->isIndexScaled();
576 bool Signed = MGS->isIndexSigned() ||
577 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
578 return Signed && !MGS->isIndexScaled();
585 bool Signed = MGS->isIndexSigned() ||
586 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
587 return !Signed && MGS->isIndexScaled();
594 bool Signed = MGS->isIndexSigned() ||
595 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
596 return !Signed && !MGS->isIndexScaled();
613 // top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
615 return SDValue(N,0)->getValueType(0) == MVT::i32 &&
616 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
619 // top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
621 return SDValue(N,0)->getValueType(0) == MVT::i64 &&
622 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
625 // topbitsallzero - Return true if all bits except the lowest bit are known zero
627 return SDValue(N,0)->getValueType(0) == MVT::i32 &&
628 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
631 return SDValue(N,0)->getValueType(0) == MVT::i64 &&
632 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
642 SDTCisVT<1, i32> ]>,
646 SDTCisVT<1, i32> ]>,
649 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
654 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
659 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
664 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
669 SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
670 SDTCisVT<1, i32>,
684 SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
685 SDTCisPtrTy<1>,
768 return N->getFlags().hasExact();
840 def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
841 SDTCisSameAs<1, 2>]>;
912 def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
918 def SDT_AArch64unpk : SDTypeProfile<1, 1, [
919 SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
937 SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
941 SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
944 def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>;
950 def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1,
951 [SDTCisInt<0>, SDTCisVec<1>]>, []>;
957 if (N->getOpcode() == ISD::ADD)
959 return CurDAG->isADDLike(SDValue(N,0));
968 // Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
970 return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
971 CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
974 //===----------------------------------------------------------------------===//
976 //===----------------------------------------------------------------------===//
979 // We could compute these on a per-module basis but doing so requires accessing
981 // to that (see post-commit review comments for r301750).
982 let RecomputePerFunction = 1 in {
986 def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
988 // Register restrictions for indirect tail-calls:
989 // - If branch target enforcement is enabled, indirect calls must use x16 or
992 // - If PAuthLR is enabled, x16 is used in the epilogue to hold the address
1000 …llX16X17 : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->get…
1002 …ilCallX17 : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getI…
1003 // BTI off, PAuthLR on: Any non-callee-saved register except x16
1004 …llNotX16 : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getI…
1005 // BTI off, PAuthLR off: Any non-callee-saved register
1006 …lCallAny : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->get…
1008 def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1009 def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1015 …: Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionPro…
1022 //===----------------------------------------------------------------------===//
1024 //===----------------------------------------------------------------------===//
1026 //===----------------------------------------------------------------------===//
1028 let hasSideEffects = 1, isCodeGenOnly = 1 in {
1043 // stack-clash protection is enabled.
1051 // stack-clash protection is enabled.
1058 // when stack-clash protection is enabled.
1059 let usesCustomInserter = 1 in
1066 } // hasSideEffects = 1, isCodeGenOnly = 1
1068 let isReMaterializable = 1, isCodeGenOnly = 1 in {
1131 // In general these get lowered into a sequence of three 4-byte instructions.
1132 // 32-bit jump table destination is actually only 2 instructions since we can
1133 // use the table itself as a PC-relative base. But optimization occurs after
1136 isNotDuplicable = 1 in {
1148 // A hardened but more expensive version of jump-table dispatch.
1151 // a plain BR) in a single non-attackable sequence.
1154 // mean that the index can be attacker-controlled. To address that, we also do
1156 // jump-table array. When it doesn't, this branches to the first entry.
1160 // to avoid signing jump-table entries and turning them into pointers.
1163 let isNotDuplicable = 1 in
1165 let isBranch = 1;
1166 let isTerminator = 1;
1167 let isIndirectBranch = 1;
1168 let isBarrier = 1;
1169 let isNotDuplicable = 1;
1175 // Space-consuming pseudo to aid testing of placement and reachability
1179 let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
1184 let hasSideEffects = 1, isCodeGenOnly = 1 in {
1193 let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
1194 // This gets lowered to a pair of 4-byte instructions.
1198 // This gets lowered to a 4-byte instruction.
1204 //===----------------------------------------------------------------------===//
1206 //===----------------------------------------------------------------------===//
1231 let mayLoad = 1, mayStore = 1 in
1253 let CRm{1-0} = 0b11;
1254 let Inst{9-8} = 0b10;
1263 // Branch Record Buffer two-word mnemonic instructions
1266 let Inst{31-8} = 0b110101010000100101110010;
1267 let Inst{7-5} = op2;
1280 // ARMv9.4-A Guarded Control Stack
1283 let Inst{20-8} = 0b0100001110111;
1284 let Inst{7-5} = op2;
1294 let Inst{20-19} = 0b01;
1295 let Inst{18-16} = op1;
1296 let Inst{15-8} = 0b01110111;
1297 let Inst{7-5} = op2;
1299 let hasSideEffects = 1;
1302 let mayStore = 1, mayLoad = 1 in
1304 let mayStore = 1 in
1309 : RtSystemI<1, (outs GPR64:$Rt), (ins GPR64:$src), mnemonic, "\t$Rt", pattern> {
1310 let Inst{20-19} = 0b01;
1311 let Inst{18-16} = op1;
1312 let Inst{15-8} = 0b01110111;
1313 let Inst{7-5} = op2;
1315 let hasSideEffects = 1;
1321 let mayStore = 1, mayLoad = 1 in
1323 // FIXME: mayStore = 1 only needed to match the intrinsic definition
1324 let mayStore = 1, mayLoad = 1 in
1330 def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>;
1339 def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>;
1345 let Inst{31-15} = 0b11011001000111110;
1346 let Inst{14-12} = op;
1347 let Inst{11-10} = 0b11;
1348 let Inst{9-5} = Rn;
1349 let Inst{4-0} = Rt;
1355 // ARMv8.2-A Dot Product
1358 defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>;
1360 defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
1363 // ARMv8.6-A BFloat
1365 defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">;
1369 def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1371 def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1378 // Vector-scalar BFDOT:
1379 // The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
1380 // register (the instruction uses a single 32-bit lane from it), so the pattern
1405 def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
1406 def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
1407 defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>;
1408 defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>;
1429 def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
1436 // ARMv8.2-A FP16 Fused Multiply-Add Long
1438 defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
1439 defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
1440 defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
1441 defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
1444 defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
1445 defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
1448 // Armv8.2-A Crypto extensions
1514 …128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))),
1565 // v8.3a complex add and multiply-accumulate. No predicate here, that is done
1567 defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
1569 defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
1571 defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
1577 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
1581 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
1588 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
1593 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
1601 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
1612 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
1646 let isAuthenticated = 1 in {
1654 let isAuthenticated = 1 in {
1662 let isAuthenticated = 1 in {
1708 def : InstAlias<"paciaz", (PACIAZ), 1>;
1709 def : InstAlias<"pacibz", (PACIBZ), 1>;
1710 def : InstAlias<"autiaz", (AUTIAZ), 1>;
1711 def : InstAlias<"autibz", (AUTIBZ), 1>;
1712 def : InstAlias<"paciasp", (PACIASP), 1>;
1713 def : InstAlias<"pacibsp", (PACIBSP), 1>;
1714 def : InstAlias<"autiasp", (AUTIASP), 1>;
1715 def : InstAlias<"autibsp", (AUTIBSP), 1>;
1716 def : InstAlias<"pacia1716", (PACIA1716), 1>;
1717 def : InstAlias<"pacib1716", (PACIB1716), 1>;
1718 def : InstAlias<"autia1716", (AUTIA1716), 1>;
1719 def : InstAlias<"autib1716", (AUTIB1716), 1>;
1720 def : InstAlias<"xpaclri", (XPACLRI), 1>;
1739 def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>;
1741 def XPACD : ClearAuth<1, "xpacd">;
1748 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
1750 def BRAB : AuthBranchTwoOperands<0, 1, "brab">;
1752 let isCall = 1, Defs = [LR], Uses = [SP] in {
1753 def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">;
1754 def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">;
1757 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
1759 def BRABZ : AuthOneOperand<0b000, 1, "brabz">;
1761 let isCall = 1, Defs = [LR], Uses = [SP] in {
1763 def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">;
1776 let isCodeGenOnly = 1;
1777 let hasSideEffects = 1;
1780 let isCall = 1;
1792 let isCodeGenOnly = 1;
1793 let isCall = 1;
1803 let isCodeGenOnly = 1;
1804 let hasNoSchedulingInfo = 1;
1805 let hasSideEffects = 1;
1808 let isBranch = 1;
1809 let isTerminator = 1;
1810 let isBarrier = 1;
1811 let isIndirectBranch = 1;
1816 let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1818 def RETAB : AuthReturn<0b010, 1, "retab">;
1820 def ERETAB : AuthReturn<0b100, 1, "eretab">;
1824 defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>;
1831 let isCodeGenOnly = 1;
1832 let hasSideEffects = 1;
1840 // AUT and re-PAC a value, using different keys/data.
1848 let isCodeGenOnly = 1;
1849 let hasSideEffects = 1;
1862 let isReMaterializable = 1;
1863 let isCodeGenOnly = 1;
1873 let isReMaterializable = 1;
1874 let isCodeGenOnly = 1;
1884 let isReMaterializable = 1;
1885 let isCodeGenOnly = 1;
1890 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Size = 16,
1916 // v9.5-A pointer authentication extensions
1919 // disassembling if we don't have the pauth-lr feature.
1945 let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1953 def : InstAlias<"pacm", (PACM), 1>;
1969 let Inst{20-5} = 0b0000001000000000;
1972 def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
1981 let Inst{18-16} = 0b000;
1982 let Inst{11-8} = 0b0000;
1983 let Unpredictable{11-8} = 0b1111;
1984 let Inst{7-5} = 0b001;
1988 let Inst{18-16} = 0b000;
1989 let Inst{11-8} = 0b0000;
1990 let Unpredictable{11-8} = 0b1111;
1991 let Inst{7-5} = 0b010;
1996 // Armv8.5-A speculation barrier
1998 let Inst{20-5} = 0b0001100110000111;
1999 let Unpredictable{11-8} = 0b1111;
2001 let hasSideEffects = 1;
2023 // This gets lowered into a 24-byte instruction sequence
2085 def SYSLxt : SystemLXtI<1, "sysl">;
2108 //===----------------------------------------------------------------------===//
2110 //===----------------------------------------------------------------------===//
2185 let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
2186 isAsCheapAsAMove = 1 in {
2201 // If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
2215 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
2222 // The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
2230 return CurDAG->getTargetConstant(
2231 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
2235 return CurDAG->getTargetConstant(
2236 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
2277 //===----------------------------------------------------------------------===//
2279 //===----------------------------------------------------------------------===//
2283 defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
2292 defm SUB : AddSub<1, "sub", "add">;
2304 defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
2307 return N->getOpcode() == ISD::CopyFromReg &&
2308 cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
2324 let AddedComplexity = 1 in {
2333 // Because of the immediate format for add/sub-imm instructions, the
2334 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2336 let AddedComplexity = 1 in {
2347 // Because of the immediate format for add/sub-imm instructions, the
2348 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2350 let AddedComplexity = 1 in {
2378 defm SDIV : Div<1, "sdiv", sdiv>;
2400 // Multiply-add
2403 defm MSUB : MulAccum<1, "msub">;
2422 def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
2424 def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
2528 // Multiply-high
2536 def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
2538 def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
2539 def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
2540 def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
2541 def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
2543 // v8.1 atomic CAS
2545 defm CASA : CompareAndSwap<1, 0, "a">;
2546 defm CASL : CompareAndSwap<0, 1, "l">;
2547 defm CASAL : CompareAndSwap<1, 1, "al">;
2549 // v8.1 atomic CASP
2551 defm CASPA : CompareAndSwapPair<1, 0, "a">;
2552 defm CASPL : CompareAndSwapPair<0, 1, "l">;
2553 defm CASPAL : CompareAndSwapPair<1, 1, "al">;
2555 // v8.1 atomic SWP
2557 defm SWPA : Swap<1, 0, "a">;
2558 defm SWPL : Swap<0, 1, "l">;
2559 defm SWPAL : Swap<1, 1, "al">;
2561 // v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
2563 defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">;
2564 defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">;
2565 defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
2568 defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">;
2569 defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">;
2570 defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
2573 defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">;
2574 defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">;
2575 defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
2578 defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">;
2579 defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">;
2580 defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
2583 defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">;
2584 defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">;
2585 defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
2588 defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">;
2589 defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">;
2590 defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
2593 defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">;
2594 defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">;
2595 defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
2598 defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">;
2599 defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">;
2600 defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
2602 // v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
2620 let isNotDuplicable = 1;
2623 def SUBG : AddSubG<1, "subg", null_frag>;
2625 def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
2628 def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
2641 def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
2643 def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
2691 let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in {
2717 //===----------------------------------------------------------------------===//
2719 //===----------------------------------------------------------------------===//
2738 defm BICS : LogicalRegS<0b11, 1, "bics",
2741 defm BIC : LogicalReg<0b00, 1, "bic",
2743 defm EON : LogicalReg<0b10, 1, "eon",
2746 defm ORN : LogicalReg<0b01, 1, "orn",
2786 //===----------------------------------------------------------------------===//
2788 //===----------------------------------------------------------------------===//
2802 def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
2803 (i32 1))),
2805 def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
2806 (i64 1))),
2826 // Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
2834 //===----------------------------------------------------------------------===//
2836 //===----------------------------------------------------------------------===//
2849 //===----------------------------------------------------------------------===//
2851 //===----------------------------------------------------------------------===//
2859 uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
2860 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2864 uint64_t enc = 31 - N->getZExtValue();
2865 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2868 // min(7, 31 - shift_amt)
2870 uint64_t enc = 31 - N->getZExtValue();
2872 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2875 // min(15, 31 - shift_amt)
2877 uint64_t enc = 31 - N->getZExtValue();
2879 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2883 uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
2884 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2888 uint64_t enc = 63 - N->getZExtValue();
2889 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2892 // min(7, 63 - shift_amt)
2894 uint64_t enc = 63 - N->getZExtValue();
2896 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2899 // min(15, 63 - shift_amt)
2901 uint64_t enc = 63 - N->getZExtValue();
2903 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2906 // min(31, 63 - shift_amt)
2908 uint64_t enc = 63 - N->getZExtValue();
2910 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2952 //===----------------------------------------------------------------------===//
2954 //===----------------------------------------------------------------------===//
2956 defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
2958 //===----------------------------------------------------------------------===//
2960 //===----------------------------------------------------------------------===//
2963 def inc : PatFrag<(ops node:$in), (add_and_or_is_add node:$in, 1)>;
2965 defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
2966 defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
2981 def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
2983 def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
2985 def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
2987 def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
2989 def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
2991 def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
2993 def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
2995 def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
2997 def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
2999 def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
3001 def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
3003 def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
3006 def : Pat<(add_and_or_is_add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
3008 def : Pat<(add_and_or_is_add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))…
3011 def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
3013 def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
3015 def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
3018 def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
3020 def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
3022 def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
3053 //===----------------------------------------------------------------------===//
3054 // PC-relative instructions.
3055 //===----------------------------------------------------------------------===//
3056 let isReMaterializable = 1 in {
3062 def ADRP : ADRI<1, "adrp", adrplabel,
3064 } // isReMaterializable = 1
3075 //===----------------------------------------------------------------------===//
3077 //===----------------------------------------------------------------------===//
3079 let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
3083 } // isReturn = 1, isTerminator = 1, isBarrier = 1
3088 let isCall = 1, Defs = [LR], Uses = [SP] in {
3121 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
3125 // Create a separate pseudo-instruction for codegen to use so that we don't
3130 let isTerminator = 1;
3131 let isBarrier = 1;
3132 let isReturn = 1;
3135 // This is a directive-like pseudo-instruction. The purpose is to insert an
3138 let hasSideEffects = 1 in
3154 let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16,
3155 isCodeGenOnly = 1 in
3163 //===----------------------------------------------------------------------===//
3165 //===----------------------------------------------------------------------===//
3168 // Armv8.8-A variant form which hints to the branch predictor that
3171 def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
3173 //===----------------------------------------------------------------------===//
3174 // Compare-and-branch instructions.
3175 //===----------------------------------------------------------------------===//
3177 defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
3179 //===----------------------------------------------------------------------===//
3180 // Test-bit-and-branch instructions.
3181 //===----------------------------------------------------------------------===//
3183 defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
3185 //===----------------------------------------------------------------------===//
3187 //===----------------------------------------------------------------------===//
3188 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
3192 let isCall = 1, Defs = [LR], Uses = [SP] in {
3193 def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
3197 //===----------------------------------------------------------------------===//
3199 //===----------------------------------------------------------------------===//
3200 let isTrap = 1 in {
3220 //===----------------------------------------------------------------------===//
3222 //===----------------------------------------------------------------------===//
3228 defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
3229 defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
3230 defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
3235 // Pair (pre-indexed)
3239 def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
3240 def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
3241 def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
3246 // Pair (post-indexed)
3250 def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
3251 def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
3252 def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
3262 defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
3263 defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
3264 defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
3272 //---
3274 //---
3282 // Floating-point
3284 defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", i8, load>;
3285 defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>;
3286 defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>;
3287 defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>;
3288 defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
3291 // Load sign-extended half-word
3295 // Load sign-extended byte
3299 // Load sign-extended word
3302 // Pre-fetch.
3371 // We must do vector loads with LD1 in big-endian.
3385 // We must do vector loads with LD1 in big-endian.
3397 // zextload -> i64
3416 // zextloadi1 -> zextloadi8
3419 // extload -> zextload
3424 // extloadi1 -> zextloadi8
3429 // zextload -> i64
3441 // extload -> zextload
3446 // zextloadi1 -> zextloadi8
3450 //---
3452 //---
3460 defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
3463 defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
3466 defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
3469 defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
3472 defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
3519 // We must use LD1 to perform vector loads in big-endian.
3540 // We must use LD1 to perform vector loads in big-endian.
3569 // zextload -> i64
3575 // zextloadi1 -> zextloadi8
3581 // extload -> zextload
3597 // load sign-extended half-word
3607 // load sign-extended byte
3617 // load sign-extended word
3623 // load zero-extended word
3627 // Pre-fetch.
3635 //---
3640 const DataLayout &DL = MF->getDataLayout();
3641 Align Align = G->getGlobal()->getPointerAlignment(DL);
3642 return Align >= 4 && G->getOffset() % 4 == 0;
3645 return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
3654 def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
3656 def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
3658 def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
3662 // load sign-extended word
3675 //---
3684 defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
3687 defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
3690 defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
3693 defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
3696 defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
3750 // anyext -> zext
3782 //---
3834 // zextload -> i64
3840 // load sign-extended half-word
3850 // load sign-extended byte
3860 // load sign-extended word
3897 // Half-vector patterns
3933 // Pre-fetch.
3938 //---
3946 // load sign-extended half-word
3950 // load sign-extended byte
3954 // load sign-extended word
3957 //---
3958 // (immediate pre-indexed)
3962 def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">;
3963 def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3964 def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3965 def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3966 def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3969 // load sign-extended half-word
3973 // load sign-extended byte
3977 // load zero-extended byte
3981 // load sign-extended word
3984 //---
3985 // (immediate post-indexed)
3989 def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">;
3990 def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3991 def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3992 def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3993 def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3996 // load sign-extended half-word
4000 // load sign-extended byte
4004 // load zero-extended byte
4008 // load sign-extended word
4011 //===----------------------------------------------------------------------===//
4013 //===----------------------------------------------------------------------===//
4016 // FIXME: Use dedicated range-checked addressing mode operand here.
4020 defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
4021 defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
4022 defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
4025 // Pair (pre-indexed)
4029 def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
4030 def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
4031 def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
4034 // Pair (post-indexed)
4038 def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
4039 def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
4040 def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
4047 defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
4048 defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
4049 defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
4059 //---
4069 // Floating-point
4071 defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", i8, store>;
4072 defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>;
4073 defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>;
4074 defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>;
4075 defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
4124 // We must use ST1 to store vectors in big-endian.
4138 // We must use ST1 to store vectors in big-endian.
4177 //---
4186 defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
4189 defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
4192 defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
4195 defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
4198 defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
4226 // We must use ST1 to store vectors in big-endian.
4253 // We must use ST1 to store vectors in big-endian.
4313 //---
4322 defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
4325 defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
4328 defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
4331 defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
4334 defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
4377 // We must use ST1 to store vectors in big-endian.
4403 // We must use ST1 to store vectors in big-endian.
4460 //---
4484 //---
4492 //---
4493 // (immediate pre-indexed)
4497 def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, i8>;
4498 def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>;
4499 def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>;
4500 def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>;
4501 def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
4548 //---
4549 // (immediate post-indexed)
4553 def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, i8>;
4554 def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>;
4555 def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>;
4556 def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>;
4557 def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
4611 //===----------------------------------------------------------------------===//
4613 //===----------------------------------------------------------------------===//
4615 def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">;
4616 def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">;
4617 def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
4618 def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
4620 def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
4621 def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
4622 def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
4623 def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
4625 def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
4626 def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
4627 def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
4628 def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
4630 def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">;
4631 def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">;
4632 def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
4633 def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
4639 LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
4641 case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
4650 def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
4651 def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
4652 def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
4653 def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
4660 def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
4661 def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
4663 def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
4664 def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
4666 def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
4667 def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
4669 def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
4670 def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
4673 // v8.1a "Limited Order Region" extension load-acquire instructions
4674 def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
4675 def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
4676 def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
4677 def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
4679 // v8.1a "Limited Order Region" extension store-release instructions
4680 def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">;
4681 def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">;
4682 def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
4683 def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
4692 //===----------------------------------------------------------------------===//
4694 //===----------------------------------------------------------------------===//
4833 //===----------------------------------------------------------------------===//
4835 //===----------------------------------------------------------------------===//
4838 defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
4868 //===----------------------------------------------------------------------===//
4870 //===----------------------------------------------------------------------===//
4875 let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1,
4905 //===----------------------------------------------------------------------===//
4907 //===----------------------------------------------------------------------===//
4920 // Pattern for bf16 -> fp32.
4923 // Pattern for bf16 -> fp64.
4927 //===----------------------------------------------------------------------===//
4929 //===----------------------------------------------------------------------===//
4954 // Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions
4989 //===----------------------------------------------------------------------===//
4991 //===----------------------------------------------------------------------===//
5052 //===----------------------------------------------------------------------===//
5054 //===----------------------------------------------------------------------===//
5057 defm FMSUB : ThreeOperandFPData<0, 1, "fmsub",
5059 defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
5061 defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
5070 // Here we handle first -(a + b*c) for FNMADD:
5082 // Now it's time for "(-a) + (-b)*c"
5094 //===----------------------------------------------------------------------===//
5096 //===----------------------------------------------------------------------===//
5098 defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
5101 //===----------------------------------------------------------------------===//
5103 //===----------------------------------------------------------------------===//
5105 defm FCCMPE : FPCondComparison<1, "fccmpe">;
5108 //===----------------------------------------------------------------------===//
5110 //===----------------------------------------------------------------------===//
5119 // pseudo-instruction since the eventual code will need to introduce basic
5128 let usesCustomInserter = 1;
5129 let hasNoSchedulingInfo = 1;
5132 //===----------------------------------------------------------------------===//
5134 //===----------------------------------------------------------------------===//
5135 let isPseudo = 1 in {
5159 //===----------------------------------------------------------------------===//
5160 let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
5161 isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
5163 let usesCustomInserter = 1 in
5169 let isPseudo = 1 in {
5176 //===----------------------------------------------------------------------===//
5178 //===----------------------------------------------------------------------===//
5180 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
5189 //===----------------------------------------------------------------------===//
5191 //===----------------------------------------------------------------------===//
5193 defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
5195 // Match UABDL in log2-shuffle patterns.
5217 defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
5219 defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
5221 defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
5224 defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>;
5241 defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
5242 defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
5243 defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
5244 defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
5245 defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
5247 defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
5264 defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
5266 defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
5281 defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
5282 defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
5283 defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
5285 defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
5286 defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
5318 defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>;
5319 defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
5320 defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>;
5321 defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>;
5324 defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>;
5325 defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>;
5326 defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>;
5330 defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
5331 defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
5332 defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
5335 defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
5336 defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>;
5337 defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg",
5339 defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
5340 // Aliases for MVN -> NOT.
5355 defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
5357 defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
5365 defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
5367 defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
5369 defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
5371 defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
5372 defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>;
5373 defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
5374 defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
5375 defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
5376 defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
5420 // trunc(umin(X, 255)) -> UQXTRN v8i8
5423 // trunc(umin(X, 65535)) -> UQXTRN v4i16
5426 // trunc(smin(smax(X, -128), 128)) -> SQXTRN
5434 // trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
5443 // concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
5448 // concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
5454 // concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
5467 // concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
5492 //===----------------------------------------------------------------------===//
5494 //===----------------------------------------------------------------------===//
5498 defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
5501 defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
5502 defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
5507 defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
5516 defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>;
5517 defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>;
5518 defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>;
5521 defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5522 defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5523 defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>;
5524 defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
5526 defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
5528 defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
5529 defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
5530 defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
5531 defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
5537 defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
5541 defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>;
5543 defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
5544 defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>;
5548 defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
5551 defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
5563 defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
5570 defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
5571 defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
5573 defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
5574 defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>;
5575 defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
5576 defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
5577 defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
5578 defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
5579 defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
5580 defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
5581 defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
5582 defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
5583 defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
5584 defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
5585 defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
5586 defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
5587 defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
5589 defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
5601 defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
5610 defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
5611 defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit">;
5612 defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
5726 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
5735 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
5740 def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
5904 //===----------------------------------------------------------------------===//
5906 //===----------------------------------------------------------------------===//
5909 defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
5912 defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
5913 defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
5915 defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
5925 defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
5927 defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
5930 defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5931 defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5934 defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStr…
5937 defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
5943 defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>;
5944 defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
5945 defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
5946 defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
5947 defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
5948 defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
5949 defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
5951 defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
5952 defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
5994 //===----------------------------------------------------------------------===//
5996 //===----------------------------------------------------------------------===//
6011 //===----------------------------------------------------------------------===//
6013 //===----------------------------------------------------------------------===//
6017 defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
6019 defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
6021 defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
6022 defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
6023 defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
6024 defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
6025 defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
6027 defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
6029 defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
6031 defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
6032 defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
6033 defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
6035 defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
6036 defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
6037 defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
6038 defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
6039 defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
6040 defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
6044 defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
6046 defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
6049 defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>;
6050 defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
6051 defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
6156 // Some float -> int -> float conversion patterns for which we want to keep the
6158 // avoid more costly int <-> fp register transfers.
6176 // int -> float conversion of value in lane 0 of simd vector should use
6177 // correct cvtf variant to avoid costly fpr <-> gpr register transfers.
6190 // fp16: integer extraction from vector must be at least 32-bits to be legal.
6191 // Actual extraction result is then an in-reg sign-extension of lower 16-bits.
6197 // unsigned 32-bit extracted element is truncated to 16-bits using AND
6205 // Here are the patterns for 8 and 16-bits to float.
6206 // 8-bits -> float.
6236 // 16-bits -> float.
6247 // 32-bits are handled in target specific dag combine:
6249 // 64-bits integer to 32-bits floating point, not possible with
6253 // Here are the patterns for 8, 16, 32, and 64-bits to double.
6254 // 8-bits -> double.
6265 // 16-bits -> double.
6276 // 32-bits -> double.
6287 // 64-bits -> double are handled in target specific dag combine:
6291 //===----------------------------------------------------------------------===//
6292 // Advanced SIMD three different-sized vector instructions.
6293 //===----------------------------------------------------------------------===//
6297 defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
6298 defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
6323 defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
6325 defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
6327 defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
6329 defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
6331 defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
6333 defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
6334 defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
6336 defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
6463 //----------------------------------------------------------------------------
6465 //----------------------------------------------------------------------------
6470 return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
6477 // We use EXT to handle extract_subvector to copy the upper 64-bits of a
6478 // 128-bit vector.
6481 // A 64-bit EXT of two halves of the same 128-bit register can be done as a
6482 // single 128-bit EXT.
6487 // A 64-bit EXT of the high half of a 128-bit register can be done using a
6488 // 128-bit EXT of the whole register with an adjustment to the immediate. The
6505 defm : ExtPat<v1i64, v2i64, 1>;
6506 defm : ExtPat<v1f64, v2f64, 1>;
6508 //----------------------------------------------------------------------------
6510 //----------------------------------------------------------------------------
6524 // concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y)
6525 // concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y)
6526 // concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y)
6535 // trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y))
6536 // trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y))
6537 // trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y))
6566 //----------------------------------------------------------------------------
6568 //----------------------------------------------------------------------------
6571 defm TBX : SIMDTableLookupTied<1, "tbx">;
6585 //----------------------------------------------------------------------------
6587 //----------------------------------------------------------------------------
6593 //----------------------------------------------------------------------------
6595 //----------------------------------------------------------------------------
6599 //----------------------------------------------------------------------------
6601 //----------------------------------------------------------------------------
6607 defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
6608 defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
6611 // below, so the second operand does not matter. Re-use the first input
6659 //----------------------------------------------------------------------------
6661 //----------------------------------------------------------------------------
6663 def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
6664 def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
6665 def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
6666 def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
6667 def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
6668 def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
6669 def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
6673 def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
6675 def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
6677 def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
6679 // DUP from a 64-bit register to a 64-bit register is just a copy
6735 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
6738 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
6741 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
6807 // Extracting i8 or i16 elements will have the zero-extend transformed to
7045 // vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
7057 // f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
7094 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
7097 // If the high lanes are zero we can instead emit a d->d register mov, which
7116 //----------------------------------------------------------------------------
7118 //----------------------------------------------------------------------------
7123 defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
7124 defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
7126 defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
7129 defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>;
7130 defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>;
7215 // Patterns for across-vector intrinsics, that have a node equivalent, that
7344 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7349 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7483 //------------------------------------------------------------------------------
7485 //------------------------------------------------------------------------------
7488 defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
7515 def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
7521 def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
7525 def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8,
7528 def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
7536 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7537 def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
7548 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7549 def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
7568 // Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
7585 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7617 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7619 def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
7622 def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
7631 def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
7639 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7640 defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
7664 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7665 def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
7668 def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
7673 //----------------------------------------------------------------------------
7675 //----------------------------------------------------------------------------
7686 // the commutativity of multiplication and the fact that (-x) * y = x * (-y).
7702 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7722 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7743 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
7744 // (DUPLANE from 64-bit would be trivial).
7755 // 2 variants for 32-bit scalar version: extract from .2s or from .4s
7769 // 1 variant for 64-bit scalar version: extract from .1d or from .2d
7782 defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
7807 defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
7808 defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
7820 defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
7822 defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
7825 defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
7827 defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
7829 defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>;
7838 //----------------------------------------------------------------------------
7840 //----------------------------------------------------------------------------
7842 defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
7844 defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
7847 // Having the same base pattern for fp <--> int totally freaks it out.
7877 // Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
7925 defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
7928 defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
7930 defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7934 defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
7936 defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">;
7945 defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
7947 defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7948 defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
7950 defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;
7951 defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
7954 defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>;
7955 defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
7959 //----------------------------------------------------------------------------
7961 //----------------------------------------------------------------------------
7963 defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
7988 // X << 1 ==> X + X
7990 : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
8003 defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
8009 defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
8011 defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
8015 defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
8017 defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
8031 defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
8033 defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
8035 defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
8036 defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
8038 defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
8039 defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
8042 defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
8044 defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
8045 defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
8143 // Vector bf16 -> fp32 is implemented morally as a zext + shift.
8264 // one step: byte-to-half, half-to-word, word-to-doubleword.
8265 // SCVTF GPR -> FPR is 9 cycles.
8266 // SCVTF FPR -> FPR is 4 cyclces.
8267 // (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
8268 // Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
8271 // 8-bits -> float. 2 sizes step-up.
8297 // 16-bits -> float. 1 size step-up.
8318 // 32-bits to 32-bits are handled in target specific dag combine:
8320 // 64-bits integer to 32-bits floating point, not possible with
8324 // Here are the patterns for 8, 16, 32, and 64-bits to double.
8325 // 8-bits -> double. 3 size step-up: give up.
8326 // 16-bits -> double. 2 size step.
8351 // 32-bits -> double. 1 size step-up.
8372 // 64-bits -> double are handled in target specific dag combine:
8376 //----------------------------------------------------------------------------
8377 // AdvSIMD Load-Store Structure
8378 //----------------------------------------------------------------------------
8414 //---
8415 // Single-element
8416 //---
8418 defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
8419 defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
8421 defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
8422 let mayLoad = 1, hasSideEffects = 0 in {
8427 defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>;
8428 defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>;
8429 defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>;
8430 defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>;
8435 defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>;
8436 defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>;
8437 defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>;
8438 defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>;
8531 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8534 return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
8537 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8638 defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
8663 1>;
8673 let mayStore = 1, hasSideEffects = 0 in {
8674 defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
8675 defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
8676 defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
8677 defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>;
8682 defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>;
8683 defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>;
8684 defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>;
8685 defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>;
8693 //----------------------------------------------------------------------------
8695 //----------------------------------------------------------------------------
8698 let isCommutable = 1 in {
8745 //----------------------------------------------------------------------------
8746 // Compiler-pseudos
8747 //----------------------------------------------------------------------------
8755 // When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
8761 // containing super-reg.
8802 // bitwidth-1 bits.
8823 def : Pat<(trap), (BRK 1)>;
8827 return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
8884 // STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
8885 // is, they treat the vector as a sequence of elements to be byte-swapped.
8897 // But this is now broken - the value stored is different to the value loaded
8912 // There is also no 128-bit REV instruction. This must be synthesized with an
8916 // a) Identity conversions - vNfX <-> vNiX
8917 // b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
9506 def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
9507 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9508 def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
9509 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9510 def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
9511 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9512 def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
9513 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9515 // A 64-bit subvector insert to the first 128-bit vector position
9539 // Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
9542 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
9545 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
9547 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
9551 (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
9554 (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
9576 // add(uzp1(X, Y), uzp2(X, Y)) -> addp(X, Y)
9601 // Scalar 64-bit shifts in FPR64 registers.
9611 // Patterns for nontemporal/no-allocate stores.
9612 // We have to resort to tricks to turn a single-input store into a store pair,
9613 // because there is no single-input nontemporal store, only STNP.
9620 (DUPi64 FPR128:$Rt, (i64 1)),
9632 (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
9650 // Tail call return handling. These are all compiler pseudo-instructions,
9652 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
9657 // Indirect tail-call with any register allowed, used by MachineOutliner when
9664 // Indirect tail-calls with reduced register classes, needed for BTI and
9724 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
9749 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
9811 // FIXME: add SVE dot-product patterns.
9814 // Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
9816 def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
9817 def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
9823 foreach i = 0-7 in {
9875 // MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
9876 // MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
9877 def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
9883 // MOPS operations always contain three 4-byte instructions
9884 let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
9885 let mayLoad = 1 in {
9899 let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
9905 //-----------------------------------------------------------------------------
9913 //-----------------------------------------------------------------------------
9916 let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
9925 //===----------------------------===//
9927 //===----------------------------===//
9931 def : InstAlias<"clrbhb", (HINT 22), 1>;
9934 //===----------------------------------------------------------------------===//
9936 //===----------------------------------------------------------------------===//
9943 //===----------------------------------------------------------------------===//
9944 // General Data-Processing Instructions (FEAT_V94_DP)
9945 //===----------------------------------------------------------------------===//
9951 defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>;
9952 defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>;
9953 defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>;
9962 let Inst{2-0} = Rt{2-0};
9963 let Inst{4-3} = 0b11;
9964 let Inst{9-5} = Rn;
9965 let Inst{11-10} = 0b10;
9966 let Inst{13-12} = Rt{4-3};
9969 let Inst{20-16} = Rm;
9970 let Inst{31-21} = 0b11111000101;
9973 let hasSideEffects = 1;
9980 //===----------------------------------------------------------------------===//
9981 // 128-bit Atomics (FEAT_LSE128)
9982 //===----------------------------------------------------------------------===//
9998 //===----------------------------------------------------------------------===//
10000 //===----------------------------------------------------------------------===//
10004 …k), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
10005 …k), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback"…
10021 …p:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
10022 …p:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
10048 //===----------------------------------------------------------------------===//
10049 // 128-bit System Instructions (FEAT_SYSINSTR128)
10050 //===----------------------------------------------------------------------===//
10073 let Inst{20-19} = 0b01;
10074 let Inst{18-16} = op1;
10075 let Inst{15-12} = Cn;
10076 let Inst{11-8} = Cm;
10077 let Inst{7-5} = op2;
10078 let Inst{4-0} = 0b11111;
10085 //---
10086 // 128-bit System Registers (FEAT_SYSREG128)
10087 //---
10092 // MRRS 1101010101| 1| 1|o0| op1| Cn| Cm|op2| Rt
10093 // MSRR 1101010101| 0| 1|o0| op1| Cn| Cm|op2| Rt
10097 // MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>
10098 // MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1>
10103 def MRRS : RtSystemI128<1,
10108 let Inst{20-5} = systemreg;
10116 let Inst{20-5} = systemreg;
10120 //===----------------------------===//
10122 //===----------------------------===//
10165 //===----------------------------------------------------------------------===//
10167 //===----------------------------------------------------------------------===//
10171 defm SUBPT : AddSubCPA<1, "subpt">;
10173 // Scalar multiply-add/subtract
10175 def MSUBPT : MulAccumCPA<1, "msubpt">;
10186 (MOVIv4i32 (i32 1), (i32 0)))),