Lines Matching +full:el3 +full:-
1 //=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
21 def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">,
23 def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
25 def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
27 def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
29 def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
31 def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
33 def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
35 def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
37 def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">,
39 def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">,
41 def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">,
42 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
43 def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">,
45 def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">,
47 def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">,
49 def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">,
51 def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
52 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
54 def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">,
57 def HasEL3 : Predicate<"Subtarget->hasEL3()">,
58 AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
60 def HasVH : Predicate<"Subtarget->hasVH()">,
63 def HasLOR : Predicate<"Subtarget->hasLOR()">,
66 def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
69 def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">,
70 AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
72 def HasJS : Predicate<"Subtarget->hasJS()">,
75 def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
78 def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
81 def HasNV : Predicate<"Subtarget->hasNV()">,
84 def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
87 def HasDIT : Predicate<"Subtarget->hasDIT()">,
90 def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
93 def HasAM : Predicate<"Subtarget->hasAM()">,
96 def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
99 def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
100 AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
102 def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
105 def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
106 AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
108 def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
109 AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
110 def HasNEON : Predicate<"Subtarget->isNeonAvailable()">,
112 def HasSM4 : Predicate<"Subtarget->hasSM4()">,
114 def HasSHA3 : Predicate<"Subtarget->hasSHA3()">,
116 def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
118 def HasAES : Predicate<"Subtarget->hasAES()">,
120 def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
122 def HasCRC : Predicate<"Subtarget->hasCRC()">,
124 def HasCSSC : Predicate<"Subtarget->hasCSSC()">,
126 def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">;
127 def HasLSE : Predicate<"Subtarget->hasLSE()">,
129 def HasNoLSE : Predicate<"!Subtarget->hasLSE()">;
130 def HasRAS : Predicate<"Subtarget->hasRAS()">,
132 def HasRDM : Predicate<"Subtarget->hasRDM()">,
134 def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
136 def HasNoFullFP16 : Predicate<"!Subtarget->hasFullFP16()">;
137 def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
139 def HasSPE : Predicate<"Subtarget->hasSPE()">,
141 def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
143 "fuse-aes">;
144 def HasSVE : Predicate<"Subtarget->isSVEAvailable()">,
146 def HasSVE2 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2()">,
148 def HasSVE2p1 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()">,
150 def HasSVE2AES : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2AES()">,
151 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
152 def HasSVE2SM4 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SM4()">,
153 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
154 def HasSVE2SHA3 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SHA3()">,
155 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
156 def HasSVE2BitPerm : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2BitPerm()">,
157 … AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
158 def HasB16B16 : Predicate<"Subtarget->hasB16B16()">,
161 : Predicate<"Subtarget->hasSME()">,
163 def HasSME : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME()">,
165 def HasSMEF64F64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF64F64()">,
166 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
167 def HasSMEF16F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF16F16()">,
168 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
169 def HasSMEFA64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEFA64()">,
170 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
171 def HasSMEI16I64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEI16I64()">,
172 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
174 : Predicate<"Subtarget->hasSME2()">,
176 def HasSME2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2()">,
178 def HasSME2p1 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p1()">,
180 def HasFP8 : Predicate<"Subtarget->hasFP8()">,
182 def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">,
184 def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">,
186 def HasSSVE_FP8FMA : Predicate<"Subtarget->hasSSVE_FP8FMA() || "
187 "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
190 "ssve-fp8fma or (sve2 and fp8fma)">;
191 def HasFP8DOT2 : Predicate<"Subtarget->hasFP8DOT2()">,
193 def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
194 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
197 "ssve-fp8dot2 or (sve2 and fp8dot2)">;
198 def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">,
200 def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
201 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">,
204 "ssve-fp8dot4 or (sve2 and fp8dot4)">;
205 def HasLUT : Predicate<"Subtarget->hasLUT()">,
207 def HasSME_LUTv2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">,
208 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
209 def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">,
210 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
211 def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
212 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
217 : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
221 : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
225 : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
229 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
232 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
235 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">,
239 …: Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
241 "sme-f16f16 or sme-f8f16">;
246 : Predicate<"Subtarget->hasNEON()">,
248 def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
250 def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
252 def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
254 def HasSB : Predicate<"Subtarget->hasSB()">,
256 def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
258 def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
260 def HasBTI : Predicate<"Subtarget->hasBTI()">,
262 def HasMTE : Predicate<"Subtarget->hasMTE()">,
264 def HasTME : Predicate<"Subtarget->hasTME()">,
266 def HasETE : Predicate<"Subtarget->hasETE()">,
268 def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
270 def HasBF16 : Predicate<"Subtarget->hasBF16()">,
272 def HasNoBF16 : Predicate<"!Subtarget->hasBF16()">;
273 def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
275 def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
277 def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
279 def HasFPAC : Predicate<"Subtarget->hasFPAC())">,
281 def HasXS : Predicate<"Subtarget->hasXS()">,
283 def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
285 def HasLS64 : Predicate<"Subtarget->hasLS64()">,
287 def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
289 def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
290 AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
291 def HasHBC : Predicate<"Subtarget->hasHBC()">,
293 def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
295 def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">,
297 def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">,
299 def HasITE : Predicate<"Subtarget->hasITE()">,
301 def HasTHE : Predicate<"Subtarget->hasTHE()">,
303 def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">,
305 def HasLSE128 : Predicate<"Subtarget->hasLSE128()">,
307 def HasD128 : Predicate<"Subtarget->hasD128()">,
309 def HasCHK : Predicate<"Subtarget->hasCHK()">,
311 def HasGCS : Predicate<"Subtarget->hasGCS()">,
313 def HasCPA : Predicate<"Subtarget->hasCPA()">,
315 def IsLE : Predicate<"Subtarget->isLittleEndian()">;
316 def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
317 def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
319 : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
321 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
327 def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
329 def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
331 def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
338 //===----------------------------------------------------------------------===//
339 // AArch64-specific DAG Nodes.
342 // SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
348 // SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
355 // SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
427 def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
460 // non-extending masked load fragment.
464 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
465 cast<MaskedLoadSDNode>(N)->isUnindexed() &&
466 !cast<MaskedLoadSDNode>(N)->isNonTemporal();
472 return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
473 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
474 cast<MaskedLoadSDNode>(N)->isUnindexed();
479 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
484 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
489 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
495 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
496 cast<MaskedLoadSDNode>(N)->isUnindexed();
501 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
506 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
511 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
517 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
518 cast<MaskedLoadSDNode>(N)->isUnindexed() &&
519 cast<MaskedLoadSDNode>(N)->isNonTemporal();
522 // non-truncating masked store fragment.
526 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
527 cast<MaskedStoreSDNode>(N)->isUnindexed() &&
528 !cast<MaskedStoreSDNode>(N)->isNonTemporal();
534 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
535 cast<MaskedStoreSDNode>(N)->isUnindexed();
540 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
545 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
550 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
556 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
557 cast<MaskedStoreSDNode>(N)->isUnindexed() &&
558 cast<MaskedStoreSDNode>(N)->isNonTemporal();
567 bool Signed = MGS->isIndexSigned() ||
568 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
569 return Signed && MGS->isIndexScaled();
576 bool Signed = MGS->isIndexSigned() ||
577 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
578 return Signed && !MGS->isIndexScaled();
585 bool Signed = MGS->isIndexSigned() ||
586 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
587 return !Signed && MGS->isIndexScaled();
594 bool Signed = MGS->isIndexSigned() ||
595 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
596 return !Signed && !MGS->isIndexScaled();
613 // top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
615 return SDValue(N,0)->getValueType(0) == MVT::i32 &&
616 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
619 // top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
621 return SDValue(N,0)->getValueType(0) == MVT::i64 &&
622 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
625 // topbitsallzero - Return true if all bits except the lowest bit are known zero
627 return SDValue(N,0)->getValueType(0) == MVT::i32 &&
628 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
631 return SDValue(N,0)->getValueType(0) == MVT::i64 &&
632 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
649 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
654 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
659 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
664 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
669 SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
684 SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
768 return N->getFlags().hasExact();
957 if (N->getOpcode() == ISD::ADD)
959 return CurDAG->isADDLike(SDValue(N,0));
968 // Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
970 return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
971 CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
974 //===----------------------------------------------------------------------===//
976 //===----------------------------------------------------------------------===//
979 // We could compute these on a per-module basis but doing so requires accessing
981 // to that (see post-commit review comments for r301750).
986 def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
988 // Register restrictions for indirect tail-calls:
989 // - If branch target enforcement is enabled, indirect calls must use x16 or
992 // - If PAuthLR is enabled, x16 is used in the epilogue to hold the address
1000 …llX16X17 : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->get…
1002 …ilCallX17 : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getI…
1003 // BTI off, PAuthLR on: Any non-callee-saved register except x16
1004 …llNotX16 : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getI…
1005 // BTI off, PAuthLR off: Any non-callee-saved register
1006 …lCallAny : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->get…
1008 def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1009 def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1015 …: Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionPro…
1022 //===----------------------------------------------------------------------===//
1024 //===----------------------------------------------------------------------===//
1026 //===----------------------------------------------------------------------===//
1043 // stack-clash protection is enabled.
1051 // stack-clash protection is enabled.
1058 // when stack-clash protection is enabled.
1131 // In general these get lowered into a sequence of three 4-byte instructions.
1132 // 32-bit jump table destination is actually only 2 instructions since we can
1133 // use the table itself as a PC-relative base. But optimization occurs after
1148 // A hardened but more expensive version of jump-table dispatch.
1151 // a plain BR) in a single non-attackable sequence.
1154 // mean that the index can be attacker-controlled. To address that, we also do
1156 // jump-table array. When it doesn't, this branches to the first entry.
1160 // to avoid signing jump-table entries and turning them into pointers.
1175 // Space-consuming pseudo to aid testing of placement and reachability
1194 // This gets lowered to a pair of 4-byte instructions.
1198 // This gets lowered to a 4-byte instruction.
1204 //===----------------------------------------------------------------------===//
1206 //===----------------------------------------------------------------------===//
1253 let CRm{1-0} = 0b11;
1254 let Inst{9-8} = 0b10;
1263 // Branch Record Buffer two-word mnemonic instructions
1266 let Inst{31-8} = 0b110101010000100101110010;
1267 let Inst{7-5} = op2;
1280 // ARMv9.4-A Guarded Control Stack
1283 let Inst{20-8} = 0b0100001110111;
1284 let Inst{7-5} = op2;
1294 let Inst{20-19} = 0b01;
1295 let Inst{18-16} = op1;
1296 let Inst{15-8} = 0b01110111;
1297 let Inst{7-5} = op2;
1310 let Inst{20-19} = 0b01;
1311 let Inst{18-16} = op1;
1312 let Inst{15-8} = 0b01110111;
1313 let Inst{7-5} = op2;
1345 let Inst{31-15} = 0b11011001000111110;
1346 let Inst{14-12} = op;
1347 let Inst{11-10} = 0b11;
1348 let Inst{9-5} = Rn;
1349 let Inst{4-0} = Rt;
1355 // ARMv8.2-A Dot Product
1363 // ARMv8.6-A BFloat
1378 // Vector-scalar BFDOT:
1379 // The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
1380 // register (the instruction uses a single 32-bit lane from it), so the pattern
1436 // ARMv8.2-A FP16 Fused Multiply-Add Long
1448 // Armv8.2-A Crypto extensions
1565 // v8.3a complex add and multiply-accumulate. No predicate here, that is done
1840 // AUT and re-PAC a value, using different keys/data.
1916 // v9.5-A pointer authentication extensions
1919 // disassembling if we don't have the pauth-lr feature.
1969 let Inst{20-5} = 0b0000001000000000;
1981 let Inst{18-16} = 0b000;
1982 let Inst{11-8} = 0b0000;
1983 let Unpredictable{11-8} = 0b1111;
1984 let Inst{7-5} = 0b001;
1988 let Inst{18-16} = 0b000;
1989 let Inst{11-8} = 0b0000;
1990 let Unpredictable{11-8} = 0b1111;
1991 let Inst{7-5} = 0b010;
1996 // Armv8.5-A speculation barrier
1998 let Inst{20-5} = 0b0001100110000111;
1999 let Unpredictable{11-8} = 0b1111;
2023 // This gets lowered into a 24-byte instruction sequence
2108 //===----------------------------------------------------------------------===//
2110 //===----------------------------------------------------------------------===//
2201 // If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
2215 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
2222 // The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
2230 return CurDAG->getTargetConstant(
2231 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
2235 return CurDAG->getTargetConstant(
2236 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
2277 //===----------------------------------------------------------------------===//
2279 //===----------------------------------------------------------------------===//
2307 return N->getOpcode() == ISD::CopyFromReg &&
2308 cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
2333 // Because of the immediate format for add/sub-imm instructions, the
2334 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2347 // Because of the immediate format for add/sub-imm instructions, the
2348 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2400 // Multiply-add
2528 // Multiply-high
2717 //===----------------------------------------------------------------------===//
2719 //===----------------------------------------------------------------------===//
2786 //===----------------------------------------------------------------------===//
2788 //===----------------------------------------------------------------------===//
2826 // Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
2834 //===----------------------------------------------------------------------===//
2836 //===----------------------------------------------------------------------===//
2849 //===----------------------------------------------------------------------===//
2851 //===----------------------------------------------------------------------===//
2859 uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
2860 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2864 uint64_t enc = 31 - N->getZExtValue();
2865 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2868 // min(7, 31 - shift_amt)
2870 uint64_t enc = 31 - N->getZExtValue();
2872 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2875 // min(15, 31 - shift_amt)
2877 uint64_t enc = 31 - N->getZExtValue();
2879 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2883 uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
2884 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2888 uint64_t enc = 63 - N->getZExtValue();
2889 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2892 // min(7, 63 - shift_amt)
2894 uint64_t enc = 63 - N->getZExtValue();
2896 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2899 // min(15, 63 - shift_amt)
2901 uint64_t enc = 63 - N->getZExtValue();
2903 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2906 // min(31, 63 - shift_amt)
2908 uint64_t enc = 63 - N->getZExtValue();
2910 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2952 //===----------------------------------------------------------------------===//
2954 //===----------------------------------------------------------------------===//
2958 //===----------------------------------------------------------------------===//
2960 //===----------------------------------------------------------------------===//
2993 def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
2995 def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
2997 def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
2999 def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
3001 def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
3003 def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
3053 //===----------------------------------------------------------------------===//
3054 // PC-relative instructions.
3055 //===----------------------------------------------------------------------===//
3075 //===----------------------------------------------------------------------===//
3077 //===----------------------------------------------------------------------===//
3125 // Create a separate pseudo-instruction for codegen to use so that we don't
3135 // This is a directive-like pseudo-instruction. The purpose is to insert an
3163 //===----------------------------------------------------------------------===//
3165 //===----------------------------------------------------------------------===//
3168 // Armv8.8-A variant form which hints to the branch predictor that
3173 //===----------------------------------------------------------------------===//
3174 // Compare-and-branch instructions.
3175 //===----------------------------------------------------------------------===//
3179 //===----------------------------------------------------------------------===//
3180 // Test-bit-and-branch instructions.
3181 //===----------------------------------------------------------------------===//
3185 //===----------------------------------------------------------------------===//
3187 //===----------------------------------------------------------------------===//
3197 //===----------------------------------------------------------------------===//
3199 //===----------------------------------------------------------------------===//
3220 //===----------------------------------------------------------------------===//
3222 //===----------------------------------------------------------------------===//
3235 // Pair (pre-indexed)
3246 // Pair (post-indexed)
3272 //---
3274 //---
3282 // Floating-point
3291 // Load sign-extended half-word
3295 // Load sign-extended byte
3299 // Load sign-extended word
3302 // Pre-fetch.
3371 // We must do vector loads with LD1 in big-endian.
3385 // We must do vector loads with LD1 in big-endian.
3397 // zextload -> i64
3416 // zextloadi1 -> zextloadi8
3419 // extload -> zextload
3424 // extloadi1 -> zextloadi8
3429 // zextload -> i64
3441 // extload -> zextload
3446 // zextloadi1 -> zextloadi8
3450 //---
3452 //---
3519 // We must use LD1 to perform vector loads in big-endian.
3540 // We must use LD1 to perform vector loads in big-endian.
3569 // zextload -> i64
3575 // zextloadi1 -> zextloadi8
3581 // extload -> zextload
3597 // load sign-extended half-word
3607 // load sign-extended byte
3617 // load sign-extended word
3623 // load zero-extended word
3627 // Pre-fetch.
3635 //---
3640 const DataLayout &DL = MF->getDataLayout();
3641 Align Align = G->getGlobal()->getPointerAlignment(DL);
3642 return Align >= 4 && G->getOffset() % 4 == 0;
3645 return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
3662 // load sign-extended word
3675 //---
3750 // anyext -> zext
3782 //---
3834 // zextload -> i64
3840 // load sign-extended half-word
3850 // load sign-extended byte
3860 // load sign-extended word
3897 // Half-vector patterns
3933 // Pre-fetch.
3938 //---
3946 // load sign-extended half-word
3950 // load sign-extended byte
3954 // load sign-extended word
3957 //---
3958 // (immediate pre-indexed)
3969 // load sign-extended half-word
3973 // load sign-extended byte
3977 // load zero-extended byte
3981 // load sign-extended word
3984 //---
3985 // (immediate post-indexed)
3996 // load sign-extended half-word
4000 // load sign-extended byte
4004 // load zero-extended byte
4008 // load sign-extended word
4011 //===----------------------------------------------------------------------===//
4013 //===----------------------------------------------------------------------===//
4016 // FIXME: Use dedicated range-checked addressing mode operand here.
4025 // Pair (pre-indexed)
4034 // Pair (post-indexed)
4059 //---
4069 // Floating-point
4124 // We must use ST1 to store vectors in big-endian.
4138 // We must use ST1 to store vectors in big-endian.
4177 //---
4226 // We must use ST1 to store vectors in big-endian.
4253 // We must use ST1 to store vectors in big-endian.
4313 //---
4377 // We must use ST1 to store vectors in big-endian.
4403 // We must use ST1 to store vectors in big-endian.
4460 //---
4484 //---
4492 //---
4493 // (immediate pre-indexed)
4548 //---
4549 // (immediate post-indexed)
4611 //===----------------------------------------------------------------------===//
4613 //===----------------------------------------------------------------------===//
4639 LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
4641 case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
4673 // v8.1a "Limited Order Region" extension load-acquire instructions
4679 // v8.1a "Limited Order Region" extension store-release instructions
4692 //===----------------------------------------------------------------------===//
4694 //===----------------------------------------------------------------------===//
4833 //===----------------------------------------------------------------------===//
4835 //===----------------------------------------------------------------------===//
4868 //===----------------------------------------------------------------------===//
4870 //===----------------------------------------------------------------------===//
4905 //===----------------------------------------------------------------------===//
4907 //===----------------------------------------------------------------------===//
4920 // Pattern for bf16 -> fp32.
4923 // Pattern for bf16 -> fp64.
4927 //===----------------------------------------------------------------------===//
4929 //===----------------------------------------------------------------------===//
4989 //===----------------------------------------------------------------------===//
4991 //===----------------------------------------------------------------------===//
5052 //===----------------------------------------------------------------------===//
5054 //===----------------------------------------------------------------------===//
5070 // Here we handle first -(a + b*c) for FNMADD:
5082 // Now it's time for "(-a) + (-b)*c"
5094 //===----------------------------------------------------------------------===//
5096 //===----------------------------------------------------------------------===//
5101 //===----------------------------------------------------------------------===//
5103 //===----------------------------------------------------------------------===//
5108 //===----------------------------------------------------------------------===//
5110 //===----------------------------------------------------------------------===//
5119 // pseudo-instruction since the eventual code will need to introduce basic
5132 //===----------------------------------------------------------------------===//
5134 //===----------------------------------------------------------------------===//
5159 //===----------------------------------------------------------------------===//
5176 //===----------------------------------------------------------------------===//
5178 //===----------------------------------------------------------------------===//
5189 //===----------------------------------------------------------------------===//
5191 //===----------------------------------------------------------------------===//
5195 // Match UABDL in log2-shuffle patterns.
5340 // Aliases for MVN -> NOT.
5420 // trunc(umin(X, 255)) -> UQXTRN v8i8
5423 // trunc(umin(X, 65535)) -> UQXTRN v4i16
5426 // trunc(smin(smax(X, -128), 128)) -> SQXTRN
5434 // trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
5443 // concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
5448 // concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
5454 // concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
5467 // concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
5492 //===----------------------------------------------------------------------===//
5494 //===----------------------------------------------------------------------===//
5904 //===----------------------------------------------------------------------===//
5906 //===----------------------------------------------------------------------===//
5994 //===----------------------------------------------------------------------===//
5996 //===----------------------------------------------------------------------===//
6011 //===----------------------------------------------------------------------===//
6013 //===----------------------------------------------------------------------===//
6156 // Some float -> int -> float conversion patterns for which we want to keep the
6158 // avoid more costly int <-> fp register transfers.
6176 // int -> float conversion of value in lane 0 of simd vector should use
6177 // correct cvtf variant to avoid costly fpr <-> gpr register transfers.
6190 // fp16: integer extraction from vector must be at least 32-bits to be legal.
6191 // Actual extraction result is then an in-reg sign-extension of lower 16-bits.
6197 // unsigned 32-bit extracted element is truncated to 16-bits using AND
6205 // Here are the patterns for 8 and 16-bits to float.
6206 // 8-bits -> float.
6236 // 16-bits -> float.
6247 // 32-bits are handled in target specific dag combine:
6249 // 64-bits integer to 32-bits floating point, not possible with
6253 // Here are the patterns for 8, 16, 32, and 64-bits to double.
6254 // 8-bits -> double.
6265 // 16-bits -> double.
6276 // 32-bits -> double.
6287 // 64-bits -> double are handled in target specific dag combine:
6291 //===----------------------------------------------------------------------===//
6292 // Advanced SIMD three different-sized vector instructions.
6293 //===----------------------------------------------------------------------===//
6463 //----------------------------------------------------------------------------
6465 //----------------------------------------------------------------------------
6470 return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
6477 // We use EXT to handle extract_subvector to copy the upper 64-bits of a
6478 // 128-bit vector.
6481 // A 64-bit EXT of two halves of the same 128-bit register can be done as a
6482 // single 128-bit EXT.
6487 // A 64-bit EXT of the high half of a 128-bit register can be done using a
6488 // 128-bit EXT of the whole register with an adjustment to the immediate. The
6508 //----------------------------------------------------------------------------
6510 //----------------------------------------------------------------------------
6524 // concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y)
6525 // concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y)
6526 // concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y)
6535 // trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y))
6536 // trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y))
6537 // trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y))
6566 //----------------------------------------------------------------------------
6568 //----------------------------------------------------------------------------
6585 //----------------------------------------------------------------------------
6587 //----------------------------------------------------------------------------
6593 //----------------------------------------------------------------------------
6595 //----------------------------------------------------------------------------
6599 //----------------------------------------------------------------------------
6601 //----------------------------------------------------------------------------
6611 // below, so the second operand does not matter. Re-use the first input
6659 //----------------------------------------------------------------------------
6661 //----------------------------------------------------------------------------
6679 // DUP from a 64-bit register to a 64-bit register is just a copy
6735 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
6738 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
6741 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
6807 // Extracting i8 or i16 elements will have the zero-extend transformed to
7045 // vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
7057 // f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
7097 // If the high lanes are zero we can instead emit a d->d register mov, which
7116 //----------------------------------------------------------------------------
7118 //----------------------------------------------------------------------------
7215 // Patterns for across-vector intrinsics, that have a node equivalent, that
7344 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7349 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7483 //------------------------------------------------------------------------------
7485 //------------------------------------------------------------------------------
7568 // Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
7673 //----------------------------------------------------------------------------
7675 //----------------------------------------------------------------------------
7686 // the commutativity of multiplication and the fact that (-x) * y = x * (-y).
7702 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7722 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7743 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
7744 // (DUPLANE from 64-bit would be trivial).
7755 // 2 variants for 32-bit scalar version: extract from .2s or from .4s
7769 // 1 variant for 64-bit scalar version: extract from .1d or from .2d
7838 //----------------------------------------------------------------------------
7840 //----------------------------------------------------------------------------
7847 // Having the same base pattern for fp <--> int totally freaks it out.
7877 // Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
7959 //----------------------------------------------------------------------------
7961 //----------------------------------------------------------------------------
8143 // Vector bf16 -> fp32 is implemented morally as a zext + shift.
8264 // one step: byte-to-half, half-to-word, word-to-doubleword.
8265 // SCVTF GPR -> FPR is 9 cycles.
8266 // SCVTF FPR -> FPR is 4 cyclces.
8267 // (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
8268 // Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
8271 // 8-bits -> float. 2 sizes step-up.
8297 // 16-bits -> float. 1 size step-up.
8318 // 32-bits to 32-bits are handled in target specific dag combine:
8320 // 64-bits integer to 32-bits floating point, not possible with
8324 // Here are the patterns for 8, 16, 32, and 64-bits to double.
8325 // 8-bits -> double. 3 size step-up: give up.
8326 // 16-bits -> double. 2 size step.
8351 // 32-bits -> double. 1 size step-up.
8372 // 64-bits -> double are handled in target specific dag combine:
8376 //----------------------------------------------------------------------------
8377 // AdvSIMD Load-Store Structure
8378 //----------------------------------------------------------------------------
8414 //---
8415 // Single-element
8416 //---
8531 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8534 return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
8537 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8693 //----------------------------------------------------------------------------
8695 //----------------------------------------------------------------------------
8745 //----------------------------------------------------------------------------
8746 // Compiler-pseudos
8747 //----------------------------------------------------------------------------
8755 // When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
8761 // containing super-reg.
8802 // bitwidth-1 bits.
8827 return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
8884 // STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
8885 // is, they treat the vector as a sequence of elements to be byte-swapped.
8897 // But this is now broken - the value stored is different to the value loaded
8912 // There is also no 128-bit REV instruction. This must be synthesized with an
8916 // a) Identity conversions - vNfX <-> vNiX
8917 // b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
9515 // A 64-bit subvector insert to the first 128-bit vector position
9539 // Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
9547 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
9576 // add(uzp1(X, Y), uzp2(X, Y)) -> addp(X, Y)
9601 // Scalar 64-bit shifts in FPR64 registers.
9611 // Patterns for nontemporal/no-allocate stores.
9612 // We have to resort to tricks to turn a single-input store into a store pair,
9613 // because there is no single-input nontemporal store, only STNP.
9650 // Tail call return handling. These are all compiler pseudo-instructions,
9657 // Indirect tail-call with any register allowed, used by MachineOutliner when
9664 // Indirect tail-calls with reduced register classes, needed for BTI and
9749 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
9811 // FIXME: add SVE dot-product patterns.
9814 // Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
9823 foreach i = 0-7 in {
9883 // MOPS operations always contain three 4-byte instructions
9905 //-----------------------------------------------------------------------------
9913 //-----------------------------------------------------------------------------
9925 //===----------------------------===//
9927 //===----------------------------===//
9934 //===----------------------------------------------------------------------===//
9936 //===----------------------------------------------------------------------===//
9943 //===----------------------------------------------------------------------===//
9944 // General Data-Processing Instructions (FEAT_V94_DP)
9945 //===----------------------------------------------------------------------===//
9962 let Inst{2-0} = Rt{2-0};
9963 let Inst{4-3} = 0b11;
9964 let Inst{9-5} = Rn;
9965 let Inst{11-10} = 0b10;
9966 let Inst{13-12} = Rt{4-3};
9969 let Inst{20-16} = Rm;
9970 let Inst{31-21} = 0b11111000101;
9980 //===----------------------------------------------------------------------===//
9981 // 128-bit Atomics (FEAT_LSE128)
9982 //===----------------------------------------------------------------------===//
9998 //===----------------------------------------------------------------------===//
10000 //===----------------------------------------------------------------------===//
10004 …k), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
10005 …k), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback"…
10021 …p:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
10022 …p:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
10048 //===----------------------------------------------------------------------===//
10049 // 128-bit System Instructions (FEAT_SYSINSTR128)
10050 //===----------------------------------------------------------------------===//
10073 let Inst{20-19} = 0b01;
10074 let Inst{18-16} = op1;
10075 let Inst{15-12} = Cn;
10076 let Inst{11-8} = Cm;
10077 let Inst{7-5} = op2;
10078 let Inst{4-0} = 0b11111;
10085 //---
10086 // 128-bit System Registers (FEAT_SYSREG128)
10087 //---
10108 let Inst{20-5} = systemreg;
10116 let Inst{20-5} = systemreg;
10120 //===----------------------------===//
10122 //===----------------------------===//
10165 //===----------------------------------------------------------------------===//
10167 //===----------------------------------------------------------------------===//
10173 // Scalar multiply-add/subtract