1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Instruction definitions. 10// 11//===----------------------------------------------------------------------===// 12 13//===----------------------------------------------------------------------===// 14// ARM Instruction Predicate Definitions. 15// 16def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">, 17 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">; 18def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, 19 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">; 20def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, 21 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">; 22def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, 23 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">; 24def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, 25 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">; 26def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, 27 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">; 28def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, 29 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">; 30def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">, 31 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">; 32def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">, 33 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">; 34def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">, 35 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">; 36def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">, 37 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">; 38def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, 39 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">; 40def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, 41 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">; 42def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, 43 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">; 44def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">, 45 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">; 46def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, 47 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">; 48 49def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">, 50 AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">; 51 52def HasEL3 : Predicate<"Subtarget->hasEL3()">, 53 AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">; 54 55def HasVH : Predicate<"Subtarget->hasVH()">, 56 AssemblerPredicateWithAll<(all_of FeatureVH), "vh">; 57 58def HasLOR : Predicate<"Subtarget->hasLOR()">, 59 AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">; 60 61def HasPAuth : Predicate<"Subtarget->hasPAuth()">, 62 AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; 63 64def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">, 65 AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">; 66 67def HasJS : Predicate<"Subtarget->hasJS()">, 68 AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; 69 70def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, 71 AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">; 72 73def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, 74 AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">; 75 76def HasNV : Predicate<"Subtarget->hasNV()">, 77 AssemblerPredicateWithAll<(all_of FeatureNV), "nv">; 78 79def HasMPAM : Predicate<"Subtarget->hasMPAM()">, 80 AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">; 81 82def HasDIT : Predicate<"Subtarget->hasDIT()">, 83 AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">; 84 85def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, 86 AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">; 87 88def HasAM : Predicate<"Subtarget->hasAM()">, 89 AssemblerPredicateWithAll<(all_of FeatureAM), "am">; 90 91def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, 92 AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">; 93 94def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, 95 AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">; 96 97def HasFlagM : Predicate<"Subtarget->hasFlagM()">, 98 AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; 99 100def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">, 101 AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; 102 103def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, 104 AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">; 105def HasNEON : Predicate<"Subtarget->hasNEON()">, 106 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">; 107def HasSM4 : Predicate<"Subtarget->hasSM4()">, 108 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">; 109def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, 110 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">; 111def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, 112 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">; 113def HasAES : Predicate<"Subtarget->hasAES()">, 114 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">; 115def HasDotProd : Predicate<"Subtarget->hasDotProd()">, 116 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">; 117def HasCRC : Predicate<"Subtarget->hasCRC()">, 118 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">; 119def HasCSSC : Predicate<"Subtarget->hasCSSC()">, 120 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">; 121def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">; 122def HasLSE : Predicate<"Subtarget->hasLSE()">, 123 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">; 124def HasNoLSE : Predicate<"!Subtarget->hasLSE()">; 125def HasRAS : Predicate<"Subtarget->hasRAS()">, 126 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">; 127def HasRDM : Predicate<"Subtarget->hasRDM()">, 128 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">; 129def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, 130 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">; 131def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, 132 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">; 133def HasSPE : Predicate<"Subtarget->hasSPE()">, 134 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">; 135def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, 136 AssemblerPredicateWithAll<(all_of FeatureFuseAES), 137 "fuse-aes">; 138def HasSVE : Predicate<"Subtarget->hasSVE()">, 139 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">; 140def HasSVE2 : Predicate<"Subtarget->hasSVE2()">, 141 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">; 142def HasSVE2p1 : Predicate<"Subtarget->hasSVE2p1()">, 143 AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">; 144def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">, 145 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">; 146def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, 147 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">; 148def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, 149 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">; 150def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, 151 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; 152def HasB16B16 : Predicate<"Subtarget->hasB16B16()">, 153 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">; 154def HasSME : Predicate<"Subtarget->hasSME()">, 155 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; 156def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">, 157 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">; 158def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">, 159 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">; 160def HasSMEFA64 : Predicate<"Subtarget->hasSMEFA64()">, 161 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">; 162def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">, 163 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">; 164def HasSME2 : Predicate<"Subtarget->hasSME2()">, 165 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">; 166def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">, 167 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">; 168def HasFPMR : Predicate<"Subtarget->hasFPMR()">, 169 AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">; 170def HasFP8 : Predicate<"Subtarget->hasFP8()">, 171 AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">; 172def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">, 173 AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">; 174def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">, 175 AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">; 176def HasSSVE_FP8FMA : Predicate<"Subtarget->hasSSVE_FP8FMA() || " 177 "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">, 178 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA, 179 (all_of FeatureSVE2, FeatureFP8FMA)), 180 "ssve-fp8fma or (sve2 and fp8fma)">; 181def HasFP8DOT2 : Predicate<"Subtarget->hasFP8DOT2()">, 182 AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">; 183def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || " 184 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">, 185 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2, 186 (all_of FeatureSVE2, FeatureFP8DOT2)), 187 "ssve-fp8dot2 or (sve2 and fp8dot2)">; 188def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">, 189 AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">; 190def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || " 191 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">, 192 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4, 193 (all_of FeatureSVE2, FeatureFP8DOT4)), 194 "ssve-fp8dot4 or (sve2 and fp8dot4)">; 195def HasLUT : Predicate<"Subtarget->hasLUT()">, 196 AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">; 197def HasSME_LUTv2 : Predicate<"Subtarget->hasSME_LUTv2()">, 198 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">; 199def HasSMEF8F16 : Predicate<"Subtarget->hasSMEF8F16()">, 200 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">; 201def HasSMEF8F32 : Predicate<"Subtarget->hasSMEF8F32()">, 202 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">; 203 204// A subset of SVE(2) instructions are legal in Streaming SVE execution mode, 205// they should be enabled if either has been specified. 206def HasSVEorSME 207 : Predicate<"Subtarget->hasSVEorSME()">, 208 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), 209 "sve or sme">; 210def HasSVE2orSME 211 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">, 212 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME), 213 "sve2 or sme">; 214def HasSVE2orSME2 215 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">, 216 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2), 217 "sve2 or sme2">; 218def HasSVE2p1_or_HasSME 219 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">, 220 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; 221def HasSVE2p1_or_HasSME2 222 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">, 223 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; 224def HasSVE2p1_or_HasSME2p1 225 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">, 226 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">; 227// A subset of NEON instructions are legal in Streaming SVE execution mode, 228// they should be enabled if either has been specified. 229def HasNEONorSME 230 : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">, 231 AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME), 232 "neon or sme">; 233def HasRCPC : Predicate<"Subtarget->hasRCPC()">, 234 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; 235def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, 236 AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">; 237def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">, 238 AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">; 239def HasSB : Predicate<"Subtarget->hasSB()">, 240 AssemblerPredicateWithAll<(all_of FeatureSB), "sb">; 241def HasPredRes : Predicate<"Subtarget->hasPredRes()">, 242 AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">; 243def HasCCDP : Predicate<"Subtarget->hasCCDP()">, 244 AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; 245def HasBTI : Predicate<"Subtarget->hasBTI()">, 246 AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; 247def HasMTE : Predicate<"Subtarget->hasMTE()">, 248 AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; 249def HasTME : Predicate<"Subtarget->hasTME()">, 250 AssemblerPredicateWithAll<(all_of FeatureTME), "tme">; 251def HasETE : Predicate<"Subtarget->hasETE()">, 252 AssemblerPredicateWithAll<(all_of FeatureETE), "ete">; 253def HasTRBE : Predicate<"Subtarget->hasTRBE()">, 254 AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">; 255def HasBF16 : Predicate<"Subtarget->hasBF16()">, 256 AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">; 257def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">, 258 AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">; 259def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">, 260 AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">; 261def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">, 262 AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">; 263def HasXS : Predicate<"Subtarget->hasXS()">, 264 AssemblerPredicateWithAll<(all_of FeatureXS), "xs">; 265def HasWFxT : Predicate<"Subtarget->hasWFxT()">, 266 AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">; 267def HasLS64 : Predicate<"Subtarget->hasLS64()">, 268 AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">; 269def HasBRBE : Predicate<"Subtarget->hasBRBE()">, 270 AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">; 271def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, 272 AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">; 273def HasHBC : Predicate<"Subtarget->hasHBC()">, 274 AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">; 275def HasMOPS : Predicate<"Subtarget->hasMOPS()">, 276 AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">; 277def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">, 278 AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">; 279def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">, 280 AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">; 281def HasITE : Predicate<"Subtarget->hasITE()">, 282 AssemblerPredicateWithAll<(all_of FeatureITE), "ite">; 283def HasTHE : Predicate<"Subtarget->hasTHE()">, 284 AssemblerPredicateWithAll<(all_of FeatureTHE), "the">; 285def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">, 286 AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">; 287def HasLSE128 : Predicate<"Subtarget->hasLSE128()">, 288 AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">; 289def HasD128 : Predicate<"Subtarget->hasD128()">, 290 AssemblerPredicateWithAll<(all_of FeatureD128), "d128">; 291def HasCHK : Predicate<"Subtarget->hasCHK()">, 292 AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">; 293def HasGCS : Predicate<"Subtarget->hasGCS()">, 294 AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">; 295def HasCPA : Predicate<"Subtarget->hasCPA()">, 296 AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">; 297def IsLE : Predicate<"Subtarget->isLittleEndian()">; 298def IsBE : Predicate<"!Subtarget->isLittleEndian()">; 299def IsWindows : Predicate<"Subtarget->isTargetWindows()">; 300def UseExperimentalZeroingPseudos 301 : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; 302def UseAlternateSExtLoadCVTF32 303 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; 304 305def UseNegativeImmediates 306 : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), 307 "NegativeImmediates">; 308 309def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; 310 311def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">; 312 313def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">; 314 315def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", 316 SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, 317 SDTCisInt<1>]>>; 318 319 320//===----------------------------------------------------------------------===// 321// AArch64-specific DAG Nodes. 322// 323 324// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS 325def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, 326 [SDTCisSameAs<0, 2>, 327 SDTCisSameAs<0, 3>, 328 SDTCisInt<0>, SDTCisVT<1, i32>]>; 329 330// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS 331def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, 332 [SDTCisSameAs<0, 1>, 333 SDTCisSameAs<0, 2>, 334 SDTCisInt<0>, 335 SDTCisVT<3, i32>]>; 336 337// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS 338def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, 339 [SDTCisSameAs<0, 2>, 340 SDTCisSameAs<0, 3>, 341 SDTCisInt<0>, 342 SDTCisVT<1, i32>, 343 SDTCisVT<4, i32>]>; 344 345def SDT_AArch64Brcond : SDTypeProfile<0, 3, 346 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, 347 SDTCisVT<2, i32>]>; 348def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; 349def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 350 SDTCisVT<2, OtherVT>]>; 351 352 353def SDT_AArch64CSel : SDTypeProfile<1, 4, 354 [SDTCisSameAs<0, 1>, 355 SDTCisSameAs<0, 2>, 356 SDTCisInt<3>, 357 SDTCisVT<4, i32>]>; 358def SDT_AArch64CCMP : SDTypeProfile<1, 5, 359 [SDTCisVT<0, i32>, 360 SDTCisInt<1>, 361 SDTCisSameAs<1, 2>, 362 SDTCisInt<3>, 363 SDTCisInt<4>, 364 SDTCisVT<5, i32>]>; 365def SDT_AArch64FCCMP : SDTypeProfile<1, 5, 366 [SDTCisVT<0, i32>, 367 SDTCisFP<1>, 368 SDTCisSameAs<1, 2>, 369 SDTCisInt<3>, 370 SDTCisInt<4>, 371 SDTCisVT<5, i32>]>; 372def SDT_AArch64FCmp : SDTypeProfile<0, 2, 373 [SDTCisFP<0>, 374 SDTCisSameAs<0, 1>]>; 375def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; 376def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; 377def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; 378def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, 379 SDTCisSameAs<0, 1>, 380 SDTCisSameAs<0, 2>]>; 381def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; 382def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 383def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 384 SDTCisInt<2>, SDTCisInt<3>]>; 385def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 386def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 387 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 388def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; 389def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 390 SDTCisVec<2>, SDTCisSameAs<2,3>]>; 391 392def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>, 393 SDTCisSameAs<0,1>, 394 SDTCisSameAs<0,2>]>; 395 396def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 397def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; 398def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; 399def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 400 SDTCisSameAs<0,2>]>; 401def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 402 SDTCisSameAs<0,2>, 403 SDTCisSameAs<0,3>]>; 404def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; 405def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; 406 407def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; 408 409def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, 410 SDTCisPtrTy<1>]>; 411 412def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 413 414def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 415def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 416def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 417def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 418def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 419def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 420 421// Generates the general dynamic sequences, i.e. 422// adrp x0, :tlsdesc:var 423// ldr x1, [x0, #:tlsdesc_lo12:var] 424// add x0, x0, #:tlsdesc_lo12:var 425// .tlsdesccall var 426// blr x1 427 428// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) 429// number of operands (the variable) 430def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, 431 [SDTCisPtrTy<0>]>; 432 433def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, 434 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, 435 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, 436 SDTCisSameAs<1, 4>]>; 437 438def SDT_AArch64TBL : SDTypeProfile<1, 2, [ 439 SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> 440]>; 441 442// non-extending masked load fragment. 443def nonext_masked_load : 444 PatFrag<(ops node:$ptr, node:$pred, node:$def), 445 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 446 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 447 cast<MaskedLoadSDNode>(N)->isUnindexed() && 448 !cast<MaskedLoadSDNode>(N)->isNonTemporal(); 449}]>; 450// Any/Zero extending masked load fragments. 451def azext_masked_load : 452 PatFrag<(ops node:$ptr, node:$pred, node:$def), 453 (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ 454 return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD || 455 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) && 456 cast<MaskedLoadSDNode>(N)->isUnindexed(); 457}]>; 458def azext_masked_load_i8 : 459 PatFrag<(ops node:$ptr, node:$pred, node:$def), 460 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 461 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 462}]>; 463def azext_masked_load_i16 : 464 PatFrag<(ops node:$ptr, node:$pred, node:$def), 465 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 466 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 467}]>; 468def azext_masked_load_i32 : 469 PatFrag<(ops node:$ptr, node:$pred, node:$def), 470 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 471 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 472}]>; 473// Sign extending masked load fragments. 474def sext_masked_load : 475 PatFrag<(ops node:$ptr, node:$pred, node:$def), 476 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 477 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD && 478 cast<MaskedLoadSDNode>(N)->isUnindexed(); 479}]>; 480def sext_masked_load_i8 : 481 PatFrag<(ops node:$ptr, node:$pred, node:$def), 482 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 483 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 484}]>; 485def sext_masked_load_i16 : 486 PatFrag<(ops node:$ptr, node:$pred, node:$def), 487 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 488 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 489}]>; 490def sext_masked_load_i32 : 491 PatFrag<(ops node:$ptr, node:$pred, node:$def), 492 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 493 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 494}]>; 495 496def non_temporal_load : 497 PatFrag<(ops node:$ptr, node:$pred, node:$def), 498 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 499 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 500 cast<MaskedLoadSDNode>(N)->isUnindexed() && 501 cast<MaskedLoadSDNode>(N)->isNonTemporal(); 502}]>; 503 504// non-truncating masked store fragment. 505def nontrunc_masked_store : 506 PatFrag<(ops node:$val, node:$ptr, node:$pred), 507 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 508 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 509 cast<MaskedStoreSDNode>(N)->isUnindexed() && 510 !cast<MaskedStoreSDNode>(N)->isNonTemporal(); 511}]>; 512// truncating masked store fragments. 513def trunc_masked_store : 514 PatFrag<(ops node:$val, node:$ptr, node:$pred), 515 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 516 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 517 cast<MaskedStoreSDNode>(N)->isUnindexed(); 518}]>; 519def trunc_masked_store_i8 : 520 PatFrag<(ops node:$val, node:$ptr, node:$pred), 521 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 522 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 523}]>; 524def trunc_masked_store_i16 : 525 PatFrag<(ops node:$val, node:$ptr, node:$pred), 526 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 527 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 528}]>; 529def trunc_masked_store_i32 : 530 PatFrag<(ops node:$val, node:$ptr, node:$pred), 531 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 532 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 533}]>; 534 535def non_temporal_store : 536 PatFrag<(ops node:$val, node:$ptr, node:$pred), 537 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 538 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 539 cast<MaskedStoreSDNode>(N)->isUnindexed() && 540 cast<MaskedStoreSDNode>(N)->isNonTemporal(); 541}]>; 542 543multiclass masked_gather_scatter<PatFrags GatherScatterOp> { 544 // offsets = (signed)Index << sizeof(elt) 545 def NAME#_signed_scaled : 546 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 547 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 548 auto MGS = cast<MaskedGatherScatterSDNode>(N); 549 bool Signed = MGS->isIndexSigned() || 550 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 551 return Signed && MGS->isIndexScaled(); 552 }]>; 553 // offsets = (signed)Index 554 def NAME#_signed_unscaled : 555 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 556 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 557 auto MGS = cast<MaskedGatherScatterSDNode>(N); 558 bool Signed = MGS->isIndexSigned() || 559 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 560 return Signed && !MGS->isIndexScaled(); 561 }]>; 562 // offsets = (unsigned)Index << sizeof(elt) 563 def NAME#_unsigned_scaled : 564 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 565 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 566 auto MGS = cast<MaskedGatherScatterSDNode>(N); 567 bool Signed = MGS->isIndexSigned() || 568 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 569 return !Signed && MGS->isIndexScaled(); 570 }]>; 571 // offsets = (unsigned)Index 572 def NAME#_unsigned_unscaled : 573 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 574 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 575 auto MGS = cast<MaskedGatherScatterSDNode>(N); 576 bool Signed = MGS->isIndexSigned() || 577 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 578 return !Signed && !MGS->isIndexScaled(); 579 }]>; 580} 581 582defm nonext_masked_gather : masked_gather_scatter<nonext_masked_gather>; 583defm azext_masked_gather_i8 : masked_gather_scatter<azext_masked_gather_i8>; 584defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>; 585defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>; 586defm sext_masked_gather_i8 : masked_gather_scatter<sext_masked_gather_i8>; 587defm sext_masked_gather_i16 : masked_gather_scatter<sext_masked_gather_i16>; 588defm sext_masked_gather_i32 : masked_gather_scatter<sext_masked_gather_i32>; 589 590defm nontrunc_masked_scatter : masked_gather_scatter<nontrunc_masked_scatter>; 591defm trunc_masked_scatter_i8 : masked_gather_scatter<trunc_masked_scatter_i8>; 592defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>; 593defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>; 594 595// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise 596def top16Zero: PatLeaf<(i32 GPR32:$src), [{ 597 return SDValue(N,0)->getValueType(0) == MVT::i32 && 598 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); 599 }]>; 600 601// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise 602def top32Zero: PatLeaf<(i64 GPR64:$src), [{ 603 return SDValue(N,0)->getValueType(0) == MVT::i64 && 604 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32)); 605 }]>; 606 607// topbitsallzero - Return true if all bits except the lowest bit are known zero 608def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{ 609 return SDValue(N,0)->getValueType(0) == MVT::i32 && 610 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31)); 611 }]>; 612def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{ 613 return SDValue(N,0)->getValueType(0) == MVT::i64 && 614 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63)); 615 }]>; 616 617// Node definitions. 618def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; 619def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; 620def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; 621def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; 622def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", 623 SDCallSeqStart<[ SDTCisVT<0, i32>, 624 SDTCisVT<1, i32> ]>, 625 [SDNPHasChain, SDNPOutGlue]>; 626def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", 627 SDCallSeqEnd<[ SDTCisVT<0, i32>, 628 SDTCisVT<1, i32> ]>, 629 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 630def AArch64call : SDNode<"AArch64ISD::CALL", 631 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 632 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 633 SDNPVariadic]>; 634 635def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", 636 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 637 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 638 SDNPVariadic]>; 639 640def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", 641 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 642 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 643 SDNPVariadic]>; 644 645def AArch64call_arm64ec_to_x64 : SDNode<"AArch64ISD::CALL_ARM64EC_TO_X64", 646 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 647 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 648 SDNPVariadic]>; 649 650def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, 651 [SDNPHasChain]>; 652def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, 653 [SDNPHasChain]>; 654def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, 655 [SDNPHasChain]>; 656def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, 657 [SDNPHasChain]>; 658def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, 659 [SDNPHasChain]>; 660 661 662def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; 663def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; 664def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; 665def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; 666def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone, 667 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 668def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; 669def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; 670def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, 671 [SDNPCommutative]>; 672def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; 673def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, 674 [SDNPCommutative]>; 675def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; 676def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; 677 678def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; 679def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; 680def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; 681 682def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; 683 684def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; 685def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, 686 [SDNPHasChain]>; 687def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, 688 [SDNPHasChain]>; 689def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), 690 [(AArch64strict_fcmp node:$lhs, node:$rhs), 691 (AArch64fcmp node:$lhs, node:$rhs)]>; 692 693def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; 694def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; 695def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; 696def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; 697def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; 698def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; 699 700def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; 701 702def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; 703def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; 704def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; 705def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; 706def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; 707def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; 708 709def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; 710def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; 711def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; 712def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; 713def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; 714def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; 715def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; 716 717def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; 718def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; 719def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; 720def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; 721 722def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; 723def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; 724def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; 725def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; 726def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; 727def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; 728def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; 729def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; 730def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; 731def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; 732 733def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; 734def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; 735 736def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; 737def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; 738def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; 739def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; 740def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; 741 742def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; 743def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; 744def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; 745 746def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; 747def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; 748def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; 749def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; 750def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; 751def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), 752 (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>; 753 754def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; 755def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; 756def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; 757def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; 758def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; 759 760def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; 761def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; 762 763def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, 764 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 765 766def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, 767 [SDNPHasChain, SDNPSideEffect]>; 768 769def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; 770def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; 771 772def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", 773 SDT_AArch64TLSDescCallSeq, 774 [SDNPInGlue, SDNPOutGlue, SDNPHasChain, 775 SDNPVariadic]>; 776 777 778def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", 779 SDT_AArch64WrapperLarge>; 780 781def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; 782 783def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 784 SDTCisSameAs<1, 2>]>; 785def AArch64pmull : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull, 786 [SDNPCommutative]>; 787def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull, 788 [SDNPCommutative]>; 789def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull, 790 [SDNPCommutative]>; 791 792def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; 793def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; 794def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; 795def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; 796 797def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>; 798def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>; 799 800def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; 801def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; 802def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; 803def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; 804def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; 805def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; 806def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>; 807def AArch64saddlv : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>; 808 809def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), 810 [(abdu node:$lhs, node:$rhs), 811 (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; 812def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), 813 [(abds node:$lhs, node:$rhs), 814 (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; 815 816def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>; 817def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>; 818def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>; 819def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm), 820 [(AArch64addp_n node:$Rn, node:$Rm), 821 (int_aarch64_neon_addp node:$Rn, node:$Rm)]>; 822def AArch64uaddlp : PatFrags<(ops node:$src), 823 [(AArch64uaddlp_n node:$src), 824 (int_aarch64_neon_uaddlp node:$src)]>; 825def AArch64saddlp : PatFrags<(ops node:$src), 826 [(AArch64saddlp_n node:$src), 827 (int_aarch64_neon_saddlp node:$src)]>; 828def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm), 829 [(AArch64addp_n node:$Rn, node:$Rm), 830 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>; 831def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>; 832def AArch64rshrn : PatFrags<(ops node:$LHS, node:$RHS), 833 [(trunc (AArch64roundingvlshr node:$LHS, node:$RHS)), 834 (int_aarch64_neon_rshrn node:$LHS, node:$RHS)]>; 835def AArch64facge : PatFrags<(ops node:$Rn, node:$Rm), 836 [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)), 837 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>; 838def AArch64facgt : PatFrags<(ops node:$Rn, node:$Rm), 839 [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)), 840 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>; 841 842def AArch64fmaxnmv : PatFrags<(ops node:$Rn), 843 [(vecreduce_fmax node:$Rn), 844 (int_aarch64_neon_fmaxnmv node:$Rn)]>; 845def AArch64fminnmv : PatFrags<(ops node:$Rn), 846 [(vecreduce_fmin node:$Rn), 847 (int_aarch64_neon_fminnmv node:$Rn)]>; 848def AArch64fmaxv : PatFrags<(ops node:$Rn), 849 [(vecreduce_fmaximum node:$Rn), 850 (int_aarch64_neon_fmaxv node:$Rn)]>; 851def AArch64fminv : PatFrags<(ops node:$Rn), 852 [(vecreduce_fminimum node:$Rn), 853 (int_aarch64_neon_fminv node:$Rn)]>; 854 855def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; 856def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 857def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 858def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 859def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 860 861def SDT_AArch64unpk : SDTypeProfile<1, 1, [ 862 SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0> 863]>; 864def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>; 865def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>; 866def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>; 867def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; 868 869def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 870def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 871def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 872def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 873def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 874def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 875 876def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; 877 878def AArch64probedalloca 879 : SDNode<"AArch64ISD::PROBED_ALLOCA", 880 SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, 881 [SDNPHasChain, SDNPMayStore]>; 882 883def AArch64mrs : SDNode<"AArch64ISD::MRS", 884 SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, 885 [SDNPHasChain, SDNPOutGlue]>; 886 887def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>; 888def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>; 889def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i), 890 [(AArch64rshrnb node:$rs, node:$i), 891 (int_aarch64_sve_rshrnb node:$rs, node:$i)]>; 892 893def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1, 894 [SDTCisInt<0>, SDTCisVec<1>]>, []>; 895 896// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands 897// have no common bits. 898def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), 899 [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ 900 if (N->getOpcode() == ISD::ADD) 901 return true; 902 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); 903}]> { 904 let GISelPredicateCode = [{ 905 // Only handle G_ADD for now. FIXME. build capability to compute whether 906 // operands of G_OR have common bits set or not. 907 return MI.getOpcode() == TargetOpcode::G_ADD; 908 }]; 909} 910 911// Match mul with enough sign-bits. Can be reduced to a smaller mul operand. 912def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{ 913 return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 && 914 CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32; 915}]>; 916 917//===----------------------------------------------------------------------===// 918 919//===----------------------------------------------------------------------===// 920 921// AArch64 Instruction Predicate Definitions. 922// We could compute these on a per-module basis but doing so requires accessing 923// the Function object through the <Target>Subtarget and objections were raised 924// to that (see post-commit review comments for r301750). 925let RecomputePerFunction = 1 in { 926 def ForCodeSize : Predicate<"shouldOptForSize(MF)">; 927 def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">; 928 // Avoid generating STRQro if it is slow, unless we're optimizing for code size. 929 def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">; 930 931 def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 932 def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 933 934 def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 935 def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 936 // Toggles patterns which aren't beneficial in GlobalISel when we aren't 937 // optimizing. This allows us to selectively use patterns without impacting 938 // SelectionDAG's behaviour. 939 // FIXME: One day there will probably be a nicer way to check for this, but 940 // today is not that day. 941 def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; 942} 943 944include "AArch64InstrFormats.td" 945include "SVEInstrFormats.td" 946include "SMEInstrFormats.td" 947 948//===----------------------------------------------------------------------===// 949 950//===----------------------------------------------------------------------===// 951// Miscellaneous instructions. 952//===----------------------------------------------------------------------===// 953 954let hasSideEffects = 1, isCodeGenOnly = 1 in { 955let Defs = [SP], Uses = [SP] in { 956// We set Sched to empty list because we expect these instructions to simply get 957// removed in most cases. 958def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 959 [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, 960 Sched<[]>; 961def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 962 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, 963 Sched<[]>; 964 965} 966 967let Defs = [SP, NZCV], Uses = [SP] in { 968// Probed stack allocation of a constant size, used in function prologues when 969// stack-clash protection is enabled. 970def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch), 971 (ins i64imm:$stacksize, i64imm:$fixed_offset, 972 i64imm:$scalable_offset), 973 []>, 974 Sched<[]>; 975 976// Probed stack allocation of a variable size, used in function prologues when 977// stack-clash protection is enabled. 978def PROBED_STACKALLOC_VAR : Pseudo<(outs), 979 (ins GPR64sp:$target), 980 []>, 981 Sched<[]>; 982 983// Probed stack allocations of a variable size, used for allocas of unknown size 984// when stack-clash protection is enabled. 985let usesCustomInserter = 1 in 986def PROBED_STACKALLOC_DYN : Pseudo<(outs), 987 (ins GPR64common:$target), 988 [(AArch64probedalloca GPR64common:$target)]>, 989 Sched<[]>; 990 991} // Defs = [SP, NZCV], Uses = [SP] in 992} // hasSideEffects = 1, isCodeGenOnly = 1 993 994let isReMaterializable = 1, isCodeGenOnly = 1 in { 995// FIXME: The following pseudo instructions are only needed because remat 996// cannot handle multiple instructions. When that changes, they can be 997// removed, along with the AArch64Wrapper node. 998 999let AddedComplexity = 10 in 1000def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), 1001 [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, 1002 Sched<[WriteLDAdr]>; 1003 1004// The MOVaddr instruction should match only when the add is not folded 1005// into a load or store address. 1006def MOVaddr 1007 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1008 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), 1009 tglobaladdr:$low))]>, 1010 Sched<[WriteAdrAdr]>; 1011def MOVaddrJT 1012 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1013 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), 1014 tjumptable:$low))]>, 1015 Sched<[WriteAdrAdr]>; 1016def MOVaddrCP 1017 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1018 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), 1019 tconstpool:$low))]>, 1020 Sched<[WriteAdrAdr]>; 1021def MOVaddrBA 1022 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1023 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), 1024 tblockaddress:$low))]>, 1025 Sched<[WriteAdrAdr]>; 1026def MOVaddrTLS 1027 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1028 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), 1029 tglobaltlsaddr:$low))]>, 1030 Sched<[WriteAdrAdr]>; 1031def MOVaddrEXT 1032 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1033 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), 1034 texternalsym:$low))]>, 1035 Sched<[WriteAdrAdr]>; 1036// Normally AArch64addlow either gets folded into a following ldr/str, 1037// or together with an adrp into MOVaddr above. For cases with TLS, it 1038// might appear without either of them, so allow lowering it into a plain 1039// add. 1040def ADDlowTLS 1041 : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), 1042 [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, 1043 tglobaltlsaddr:$low))]>, 1044 Sched<[WriteAdr]>; 1045 1046} // isReMaterializable, isCodeGenOnly 1047 1048def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), 1049 (LOADgot tglobaltlsaddr:$addr)>; 1050 1051def : Pat<(AArch64LOADgot texternalsym:$addr), 1052 (LOADgot texternalsym:$addr)>; 1053 1054def : Pat<(AArch64LOADgot tconstpool:$addr), 1055 (LOADgot tconstpool:$addr)>; 1056 1057// In general these get lowered into a sequence of three 4-byte instructions. 1058// 32-bit jump table destination is actually only 2 instructions since we can 1059// use the table itself as a PC-relative base. But optimization occurs after 1060// branch relaxation so be pessimistic. 1061let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch", 1062 isNotDuplicable = 1 in { 1063def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 1064 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 1065 Sched<[]>; 1066def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 1067 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 1068 Sched<[]>; 1069def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 1070 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 1071 Sched<[]>; 1072} 1073 1074// Space-consuming pseudo to aid testing of placement and reachability 1075// algorithms. Immediate operand is the number of bytes this "instruction" 1076// occupies; register operands can be used to enforce dependency and constrain 1077// the scheduler. 1078let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in 1079def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn), 1080 [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>, 1081 Sched<[]>; 1082 1083let hasSideEffects = 1, isCodeGenOnly = 1 in { 1084 def SpeculationSafeValueX 1085 : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>; 1086 def SpeculationSafeValueW 1087 : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>; 1088} 1089 1090// SpeculationBarrierEndBB must only be used after an unconditional control 1091// flow, i.e. after a terminator for which isBarrier is True. 1092let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { 1093 // This gets lowered to a pair of 4-byte instructions. 1094 let Size = 8 in 1095 def SpeculationBarrierISBDSBEndBB 1096 : Pseudo<(outs), (ins), []>, Sched<[]>; 1097 // This gets lowered to a 4-byte instruction. 1098 let Size = 4 in 1099 def SpeculationBarrierSBEndBB 1100 : Pseudo<(outs), (ins), []>, Sched<[]>; 1101} 1102 1103//===----------------------------------------------------------------------===// 1104// System instructions. 1105//===----------------------------------------------------------------------===// 1106 1107def HINT : HintI<"hint">; 1108def : InstAlias<"nop", (HINT 0b000)>; 1109def : InstAlias<"yield",(HINT 0b001)>; 1110def : InstAlias<"wfe", (HINT 0b010)>; 1111def : InstAlias<"wfi", (HINT 0b011)>; 1112def : InstAlias<"sev", (HINT 0b100)>; 1113def : InstAlias<"sevl", (HINT 0b101)>; 1114def : InstAlias<"dgh", (HINT 0b110)>; 1115def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; 1116def : InstAlias<"csdb", (HINT 20)>; 1117// In order to be able to write readable assembly, LLVM should accept assembly 1118// inputs that use Branch Target Indentification mnemonics, even with BTI disabled. 1119// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1120// should not emit these mnemonics unless BTI is enabled. 1121def : InstAlias<"bti", (HINT 32), 0>; 1122def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; 1123def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; 1124def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; 1125 1126// v8.2a Statistical Profiling extension 1127def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; 1128 1129// As far as LLVM is concerned this writes to the system's exclusive monitors. 1130let mayLoad = 1, mayStore = 1 in 1131def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; 1132 1133// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot 1134// model patterns with sufficiently fine granularity. 1135let mayLoad = ?, mayStore = ? in { 1136def DMB : CRmSystemI<barrier_op, 0b101, "dmb", 1137 [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>; 1138 1139def DSB : CRmSystemI<barrier_op, 0b100, "dsb", 1140 [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>; 1141 1142def ISB : CRmSystemI<barrier_op, 0b110, "isb", 1143 [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>; 1144 1145def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> { 1146 let CRm = 0b0010; 1147 let Inst{12} = 0; 1148 let Predicates = [HasTRACEV8_4]; 1149} 1150 1151def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> { 1152 let CRm{1-0} = 0b11; 1153 let Inst{9-8} = 0b10; 1154 let Predicates = [HasXS]; 1155} 1156 1157let Predicates = [HasWFxT] in { 1158def WFET : RegInputSystemI<0b0000, 0b000, "wfet">; 1159def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">; 1160} 1161 1162// Branch Record Buffer two-word mnemonic instructions 1163class BRBEI<bits<3> op2, string keyword> 1164 : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> { 1165 let Inst{31-8} = 0b110101010000100101110010; 1166 let Inst{7-5} = op2; 1167 let Predicates = [HasBRBE]; 1168} 1169def BRB_IALL: BRBEI<0b100, "\tiall">; 1170def BRB_INJ: BRBEI<0b101, "\tinj">; 1171 1172} 1173 1174// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ 1175def : TokenAlias<"INJ", "inj">; 1176def : TokenAlias<"IALL", "iall">; 1177 1178 1179// ARMv9.4-A Guarded Control Stack 1180class GCSNoOp<bits<3> op2, string mnemonic> 1181 : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> { 1182 let Inst{20-8} = 0b0100001110111; 1183 let Inst{7-5} = op2; 1184 let Predicates = [HasGCS]; 1185} 1186def GCSPUSHX : GCSNoOp<0b100, "gcspushx">; 1187def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">; 1188def GCSPOPX : GCSNoOp<0b110, "gcspopx">; 1189 1190class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic, 1191 list<dag> pattern = []> 1192 : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> { 1193 let Inst{20-19} = 0b01; 1194 let Inst{18-16} = op1; 1195 let Inst{15-8} = 0b01110111; 1196 let Inst{7-5} = op2; 1197 let Predicates = [HasGCS]; 1198} 1199 1200def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">; 1201def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">; 1202 1203class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic, 1204 list<dag> pattern = []> 1205 : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> { 1206 let Inst{20-19} = 0b01; 1207 let Inst{18-16} = op1; 1208 let Inst{15-8} = 0b01110111; 1209 let Inst{7-5} = op2; 1210 let Predicates = [HasGCS]; 1211} 1212 1213def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">; 1214def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">; 1215def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent 1216 1217def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>; 1218def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>; 1219 1220def : TokenAlias<"DSYNC", "dsync">; 1221 1222let Uses = [X16], Defs = [X16], CRm = 0b0101 in { 1223 def CHKFEAT : SystemNoOperands<0b000, "hint\t#40">; 1224} 1225def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>; 1226def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>; 1227 1228class GCSSt<string mnemonic, bits<3> op> 1229 : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, $Rn", "", []>, Sched<[]> { 1230 bits<5> Rt; 1231 bits<5> Rn; 1232 let Inst{31-15} = 0b11011001000111110; 1233 let Inst{14-12} = op; 1234 let Inst{11-10} = 0b11; 1235 let Inst{9-5} = Rn; 1236 let Inst{4-0} = Rt; 1237 let Predicates = [HasGCS]; 1238} 1239def GCSSTR : GCSSt<"gcsstr", 0b000>; 1240def GCSSTTR : GCSSt<"gcssttr", 0b001>; 1241 1242 1243// ARMv8.2-A Dot Product 1244let Predicates = [HasDotProd] in { 1245defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>; 1246defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>; 1247defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>; 1248defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>; 1249} 1250 1251// ARMv8.6-A BFloat 1252let Predicates = [HasNEON, HasBF16] in { 1253defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">; 1254defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">; 1255def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">; 1256def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1257def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1258def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1259def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1260def BFCVTN : SIMD_BFCVTN; 1261def BFCVTN2 : SIMD_BFCVTN2; 1262 1263// Vector-scalar BFDOT: 1264// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit 1265// register (the instruction uses a single 32-bit lane from it), so the pattern 1266// is a bit tricky. 1267def : Pat<(v2f32 (int_aarch64_neon_bfdot 1268 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1269 (v4bf16 (bitconvert 1270 (v2i32 (AArch64duplane32 1271 (v4i32 (bitconvert 1272 (v8bf16 (insert_subvector undef, 1273 (v4bf16 V64:$Rm), 1274 (i64 0))))), 1275 VectorIndexS:$idx)))))), 1276 (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1277 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 1278 VectorIndexS:$idx)>; 1279} 1280 1281let Predicates = [HasNEONorSME, HasBF16] in { 1282def BFCVT : BF16ToSinglePrecision<"bfcvt">; 1283} 1284 1285// ARMv8.6A AArch64 matrix multiplication 1286let Predicates = [HasMatMulInt8] in { 1287def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>; 1288def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>; 1289def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>; 1290defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>; 1291defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>; 1292 1293// sudot lane has a pattern where usdot is expected (there is no sudot). 1294// The second operand is used in the dup operation to repeat the indexed 1295// element. 1296class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind, 1297 string rhs_kind, RegisterOperand RegType, 1298 ValueType AccumType, ValueType InputType> 1299 : BaseSIMDThreeSameVectorIndexS<Q, 0, 0b00, 0b1111, "sudot", dst_kind, 1300 lhs_kind, rhs_kind, RegType, AccumType, 1301 InputType, null_frag> { 1302 let Pattern = [(set (AccumType RegType:$dst), 1303 (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd), 1304 (InputType (bitconvert (AccumType 1305 (AArch64duplane32 (v4i32 V128:$Rm), 1306 VectorIndexS:$idx)))), 1307 (InputType RegType:$Rn))))]; 1308} 1309 1310multiclass SIMDSUDOTIndex { 1311 def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>; 1312 def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>; 1313} 1314 1315defm SUDOTlane : SIMDSUDOTIndex; 1316 1317} 1318 1319// ARMv8.2-A FP16 Fused Multiply-Add Long 1320let Predicates = [HasNEON, HasFP16FML] in { 1321defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; 1322defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>; 1323defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>; 1324defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>; 1325defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>; 1326defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>; 1327defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>; 1328defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>; 1329} 1330 1331// Armv8.2-A Crypto extensions 1332let Predicates = [HasSHA3] in { 1333def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; 1334def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">; 1335def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">; 1336def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">; 1337def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">; 1338def EOR3 : CryptoRRRR_16B<0b00, "eor3">; 1339def BCAX : CryptoRRRR_16B<0b01, "bcax">; 1340def XAR : CryptoRRRi6<"xar">; 1341 1342class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy> 1343 : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))), 1344 (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>; 1345 1346def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1347 (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1348 1349def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>; 1350def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>; 1351def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>; 1352 1353def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>; 1354def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>; 1355def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>; 1356def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>; 1357 1358class EOR3_pattern<ValueType VecTy> 1359 : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)), 1360 (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1361 1362def : EOR3_pattern<v16i8>; 1363def : EOR3_pattern<v8i16>; 1364def : EOR3_pattern<v4i32>; 1365def : EOR3_pattern<v2i64>; 1366 1367class BCAX_pattern<ValueType VecTy> 1368 : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))), 1369 (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1370 1371def : BCAX_pattern<v16i8>; 1372def : BCAX_pattern<v8i16>; 1373def : BCAX_pattern<v4i32>; 1374def : BCAX_pattern<v2i64>; 1375 1376def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>; 1377def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>; 1378def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>; 1379def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>; 1380 1381def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>; 1382def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>; 1383def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>; 1384def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>; 1385 1386def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>; 1387def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>; 1388def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>; 1389def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>; 1390 1391def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1392 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1393 1394def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))), 1395 (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>; 1396 1397def : Pat<(xor (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))), 1398 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1399 1400} // HasSHA3 1401 1402let Predicates = [HasSM4] in { 1403def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">; 1404def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">; 1405def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">; 1406def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">; 1407def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">; 1408def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">; 1409def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">; 1410def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">; 1411def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">; 1412 1413def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))), 1414 (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>; 1415 1416class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode> 1417 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1418 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1419 1420class SM3TT_pattern<Instruction INST, Intrinsic OpNode> 1421 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )), 1422 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>; 1423 1424class SM4_pattern<Instruction INST, Intrinsic OpNode> 1425 : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1426 (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1427 1428def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>; 1429def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>; 1430 1431def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>; 1432def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>; 1433def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>; 1434def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>; 1435 1436def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>; 1437def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>; 1438} // HasSM4 1439 1440let Predicates = [HasRCPC] in { 1441 // v8.3 Release Consistent Processor Consistent support, optional in v8.2. 1442 def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; 1443 def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; 1444 def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; 1445 def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; 1446} 1447 1448// v8.3a complex add and multiply-accumulate. No predicate here, that is done 1449// inside the multiclass as the FP16 versions need different predicates. 1450defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, 1451 "fcmla", null_frag>; 1452defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, 1453 "fcadd", null_frag>; 1454defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">; 1455 1456let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1457 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1458 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>; 1459 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1460 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>; 1461 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1462 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>; 1463 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1464 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; 1465} 1466 1467let Predicates = [HasComplxNum, HasNEON] in { 1468 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1469 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; 1470 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1471 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>; 1472 foreach Ty = [v4f32, v2f64] in { 1473 def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))), 1474 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>; 1475 def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))), 1476 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>; 1477 } 1478} 1479 1480multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> { 1481 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1482 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>; 1483 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1484 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>; 1485 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1486 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>; 1487 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1488 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>; 1489} 1490 1491multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> { 1492 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1493 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>; 1494 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1495 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>; 1496 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1497 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>; 1498 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1499 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>; 1500} 1501 1502 1503let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1504 defm : FCMLA_PATS<v4f16, V64>; 1505 defm : FCMLA_PATS<v8f16, V128>; 1506 1507 defm : FCMLA_LANE_PATS<v4f16, V64, 1508 (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>; 1509 defm : FCMLA_LANE_PATS<v8f16, V128, 1510 (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>; 1511} 1512let Predicates = [HasComplxNum, HasNEON] in { 1513 defm : FCMLA_PATS<v2f32, V64>; 1514 defm : FCMLA_PATS<v4f32, V128>; 1515 defm : FCMLA_PATS<v2f64, V128>; 1516 1517 defm : FCMLA_LANE_PATS<v4f32, V128, 1518 (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>; 1519} 1520 1521// v8.3a Pointer Authentication 1522// These instructions inhabit part of the hint space and so can be used for 1523// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is 1524// important for compatibility with other assemblers (e.g. GAS) when building 1525// software compatible with both CPUs that do or don't implement PA. 1526let Uses = [LR], Defs = [LR] in { 1527 def PACIAZ : SystemNoOperands<0b000, "hint\t#24">; 1528 def PACIBZ : SystemNoOperands<0b010, "hint\t#26">; 1529 let isAuthenticated = 1 in { 1530 def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">; 1531 def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">; 1532 } 1533} 1534let Uses = [LR, SP], Defs = [LR] in { 1535 def PACIASP : SystemNoOperands<0b001, "hint\t#25">; 1536 def PACIBSP : SystemNoOperands<0b011, "hint\t#27">; 1537 let isAuthenticated = 1 in { 1538 def AUTIASP : SystemNoOperands<0b101, "hint\t#29">; 1539 def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">; 1540 } 1541} 1542let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { 1543 def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">; 1544 def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">; 1545 let isAuthenticated = 1 in { 1546 def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">; 1547 def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">; 1548 } 1549} 1550 1551let Uses = [LR], Defs = [LR], CRm = 0b0000 in { 1552 def XPACLRI : SystemNoOperands<0b111, "hint\t#7">; 1553} 1554 1555// In order to be able to write readable assembly, LLVM should accept assembly 1556// inputs that use pointer authentication mnemonics, even with PA disabled. 1557// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1558// should not emit these mnemonics unless PA is enabled. 1559def : InstAlias<"paciaz", (PACIAZ), 0>; 1560def : InstAlias<"pacibz", (PACIBZ), 0>; 1561def : InstAlias<"autiaz", (AUTIAZ), 0>; 1562def : InstAlias<"autibz", (AUTIBZ), 0>; 1563def : InstAlias<"paciasp", (PACIASP), 0>; 1564def : InstAlias<"pacibsp", (PACIBSP), 0>; 1565def : InstAlias<"autiasp", (AUTIASP), 0>; 1566def : InstAlias<"autibsp", (AUTIBSP), 0>; 1567def : InstAlias<"pacia1716", (PACIA1716), 0>; 1568def : InstAlias<"pacib1716", (PACIB1716), 0>; 1569def : InstAlias<"autia1716", (AUTIA1716), 0>; 1570def : InstAlias<"autib1716", (AUTIB1716), 0>; 1571def : InstAlias<"xpaclri", (XPACLRI), 0>; 1572 1573// Pseudos 1574 1575let Uses = [LR, SP], Defs = [LR] in { 1576// Insertion point of LR signing code. 1577def PAUTH_PROLOGUE : Pseudo<(outs), (ins), []>, Sched<[]>; 1578// Insertion point of LR authentication code. 1579// The RET terminator of the containing machine basic block may be replaced 1580// with a combined RETA(A|B) instruction when rewriting this Pseudo. 1581def PAUTH_EPILOGUE : Pseudo<(outs), (ins), []>, Sched<[]>; 1582} 1583 1584// These pointer authentication instructions require armv8.3a 1585let Predicates = [HasPAuth] in { 1586 1587 // When PA is enabled, a better mnemonic should be emitted. 1588 def : InstAlias<"paciaz", (PACIAZ), 1>; 1589 def : InstAlias<"pacibz", (PACIBZ), 1>; 1590 def : InstAlias<"autiaz", (AUTIAZ), 1>; 1591 def : InstAlias<"autibz", (AUTIBZ), 1>; 1592 def : InstAlias<"paciasp", (PACIASP), 1>; 1593 def : InstAlias<"pacibsp", (PACIBSP), 1>; 1594 def : InstAlias<"autiasp", (AUTIASP), 1>; 1595 def : InstAlias<"autibsp", (AUTIBSP), 1>; 1596 def : InstAlias<"pacia1716", (PACIA1716), 1>; 1597 def : InstAlias<"pacib1716", (PACIB1716), 1>; 1598 def : InstAlias<"autia1716", (AUTIA1716), 1>; 1599 def : InstAlias<"autib1716", (AUTIB1716), 1>; 1600 def : InstAlias<"xpaclri", (XPACLRI), 1>; 1601 1602 multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm, 1603 SDPatternOperator op> { 1604 def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>; 1605 def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>; 1606 def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>; 1607 def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>; 1608 def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>; 1609 def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>; 1610 def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>; 1611 def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>; 1612 } 1613 1614 defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>; 1615 defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>; 1616 1617 def XPACI : ClearAuth<0, "xpaci">; 1618 def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>; 1619 def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>; 1620 1621 def XPACD : ClearAuth<1, "xpacd">; 1622 def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>; 1623 def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>; 1624 1625 def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>; 1626 1627 // Combined Instructions 1628 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1629 def BRAA : AuthBranchTwoOperands<0, 0, "braa">; 1630 def BRAB : AuthBranchTwoOperands<0, 1, "brab">; 1631 } 1632 let isCall = 1, Defs = [LR], Uses = [SP] in { 1633 def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; 1634 def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; 1635 } 1636 1637 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1638 def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; 1639 def BRABZ : AuthOneOperand<0b000, 1, "brabz">; 1640 } 1641 let isCall = 1, Defs = [LR], Uses = [SP] in { 1642 def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; 1643 def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; 1644 } 1645 1646 let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 1647 def RETAA : AuthReturn<0b010, 0, "retaa">; 1648 def RETAB : AuthReturn<0b010, 1, "retab">; 1649 def ERETAA : AuthReturn<0b100, 0, "eretaa">; 1650 def ERETAB : AuthReturn<0b100, 1, "eretab">; 1651 } 1652 1653 defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; 1654 defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; 1655 1656} 1657 1658// v9.5-A pointer authentication extensions 1659 1660// Always accept "pacm" as an alias for "hint #39", but don't emit it when 1661// disassembling if we don't have the pauth-lr feature. 1662let CRm = 0b0100 in { 1663 def PACM : SystemNoOperands<0b111, "hint\t#39">; 1664} 1665def : InstAlias<"pacm", (PACM), 0>; 1666 1667let Predicates = [HasPAuthLR] in { 1668 let Defs = [LR], Uses = [LR, SP] in { 1669 // opcode2, opcode, asm 1670 def PACIASPPC : SignAuthFixedRegs<0b00001, 0b101000, "paciasppc">; 1671 def PACIBSPPC : SignAuthFixedRegs<0b00001, 0b101001, "pacibsppc">; 1672 def PACNBIASPPC : SignAuthFixedRegs<0b00001, 0b100000, "pacnbiasppc">; 1673 def PACNBIBSPPC : SignAuthFixedRegs<0b00001, 0b100001, "pacnbibsppc">; 1674 // opc, asm 1675 def AUTIASPPCi : SignAuthPCRel<0b00, "autiasppc">; 1676 def AUTIBSPPCi : SignAuthPCRel<0b01, "autibsppc">; 1677 // opcode2, opcode, asm 1678 def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppc">; 1679 def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppc">; 1680 // opcode2, opcode, asm 1681 def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">; 1682 def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">; 1683 def AUTIA171615 : SignAuthFixedRegs<0b00001, 0b101110, "autia171615">; 1684 def AUTIB171615 : SignAuthFixedRegs<0b00001, 0b101111, "autib171615">; 1685 } 1686 1687 let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in { 1688 // opc, op2, asm 1689 def RETAASPPCi : SignAuthReturnPCRel<0b000, 0b11111, "retaasppc">; 1690 def RETABSPPCi : SignAuthReturnPCRel<0b001, 0b11111, "retabsppc">; 1691 // op3, asm 1692 def RETAASPPCr : SignAuthReturnReg<0b000010, "retaasppc">; 1693 def RETABSPPCr : SignAuthReturnReg<0b000011, "retabsppc">; 1694 } 1695 def : InstAlias<"pacm", (PACM), 1>; 1696} 1697 1698 1699// v8.3a floating point conversion for javascript 1700let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in 1701def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, 1702 "fjcvtzs", 1703 [(set GPR32:$Rd, 1704 (int_aarch64_fjcvtzs FPR64:$Rn))]> { 1705 let Inst{31} = 0; 1706} // HasJS, HasFPARMv8 1707 1708// v8.4 Flag manipulation instructions 1709let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in { 1710def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { 1711 let Inst{20-5} = 0b0000001000000000; 1712} 1713def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">; 1714def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; 1715def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", 1716 "{\t$Rn, $imm, $mask}">; 1717} // HasFlagM 1718 1719// v8.5 flag manipulation instructions 1720let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in { 1721 1722def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> { 1723 let Inst{18-16} = 0b000; 1724 let Inst{11-8} = 0b0000; 1725 let Unpredictable{11-8} = 0b1111; 1726 let Inst{7-5} = 0b001; 1727} 1728 1729def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> { 1730 let Inst{18-16} = 0b000; 1731 let Inst{11-8} = 0b0000; 1732 let Unpredictable{11-8} = 0b1111; 1733 let Inst{7-5} = 0b010; 1734} 1735} // HasAltNZCV 1736 1737 1738// Armv8.5-A speculation barrier 1739def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> { 1740 let Inst{20-5} = 0b0001100110000111; 1741 let Unpredictable{11-8} = 0b1111; 1742 let Predicates = [HasSB]; 1743 let hasSideEffects = 1; 1744} 1745 1746def : InstAlias<"clrex", (CLREX 0xf)>; 1747def : InstAlias<"isb", (ISB 0xf)>; 1748def : InstAlias<"ssbb", (DSB 0)>; 1749def : InstAlias<"pssbb", (DSB 4)>; 1750def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>; 1751 1752def MRS : MRSI; 1753def MSR : MSRI; 1754def MSRpstateImm1 : MSRpstateImm0_1; 1755def MSRpstateImm4 : MSRpstateImm0_15; 1756 1757def : Pat<(AArch64mrs imm:$id), 1758 (MRS imm:$id)>; 1759 1760// The thread pointer (on Linux, at least, where this has been implemented) is 1761// TPIDR_EL0. 1762def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), 1763 [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; 1764 1765// This gets lowered into a 24-byte instruction sequence 1766let Defs = [ X9, X16, X17, NZCV ], Size = 24 in { 1767def KCFI_CHECK : Pseudo< 1768 (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>; 1769} 1770 1771let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { 1772def HWASAN_CHECK_MEMACCESS : Pseudo< 1773 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1774 [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1775 Sched<[]>; 1776} 1777 1778let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in { 1779def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< 1780 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1781 [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1782 Sched<[]>; 1783} 1784 1785// The virtual cycle counter register is CNTVCT_EL0. 1786def : Pat<(readcyclecounter), (MRS 0xdf02)>; 1787 1788// FPCR register 1789let Uses = [FPCR] in 1790def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins), 1791 [(set GPR64:$dst, (int_aarch64_get_fpcr))]>, 1792 PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>, 1793 Sched<[WriteSys]>; 1794let Defs = [FPCR] in 1795def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val), 1796 [(int_aarch64_set_fpcr i64:$val)]>, 1797 PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>, 1798 Sched<[WriteSys]>; 1799 1800// Generic system instructions 1801def SYSxt : SystemXtI<0, "sys">; 1802def SYSLxt : SystemLXtI<1, "sysl">; 1803 1804def : InstAlias<"sys $op1, $Cn, $Cm, $op2", 1805 (SYSxt imm0_7:$op1, sys_cr_op:$Cn, 1806 sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 1807 1808 1809let Predicates = [HasTME] in { 1810 1811def TSTART : TMSystemI<0b0000, "tstart", 1812 [(set GPR64:$Rt, (int_aarch64_tstart))]>; 1813 1814def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; 1815 1816def TCANCEL : TMSystemException<0b011, "tcancel", 1817 [(int_aarch64_tcancel timm64_0_65535:$imm)]>; 1818 1819def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> { 1820 let mayLoad = 0; 1821 let mayStore = 0; 1822} 1823} // HasTME 1824 1825//===----------------------------------------------------------------------===// 1826// Move immediate instructions. 1827//===----------------------------------------------------------------------===// 1828 1829defm MOVK : InsertImmediate<0b11, "movk">; 1830defm MOVN : MoveImmediate<0b00, "movn">; 1831 1832let PostEncoderMethod = "fixMOVZ" in 1833defm MOVZ : MoveImmediate<0b10, "movz">; 1834 1835// First group of aliases covers an implicit "lsl #0". 1836def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>; 1837def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>; 1838def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1839def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1840def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1841def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1842 1843// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. 1844def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1845def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1846def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1847def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1848 1849def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1850def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1851def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1852def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1853 1854def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>; 1855def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>; 1856def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>; 1857def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>; 1858 1859def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1860def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1861 1862def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1863def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1864 1865def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>; 1866def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>; 1867 1868// Final group of aliases covers true "mov $Rd, $imm" cases. 1869multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, 1870 int width, int shift> { 1871 def _asmoperand : AsmOperandClass { 1872 let Name = basename # width # "_lsl" # shift # "MovAlias"; 1873 let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " 1874 # shift # ">"; 1875 let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; 1876 } 1877 1878 def _movimm : Operand<i32> { 1879 let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); 1880 } 1881 1882 def : InstAlias<"mov $Rd, $imm", 1883 (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; 1884} 1885 1886defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; 1887defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; 1888 1889defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; 1890defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; 1891defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; 1892defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; 1893 1894defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; 1895defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; 1896 1897defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; 1898defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; 1899defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; 1900defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; 1901 1902let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, 1903 isAsCheapAsAMove = 1 in { 1904// FIXME: The following pseudo instructions are only needed because remat 1905// cannot handle multiple instructions. When that changes, we can select 1906// directly to the real instructions and get rid of these pseudos. 1907 1908def MOVi32imm 1909 : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), 1910 [(set GPR32:$dst, imm:$src)]>, 1911 Sched<[WriteImm]>; 1912def MOVi64imm 1913 : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), 1914 [(set GPR64:$dst, imm:$src)]>, 1915 Sched<[WriteImm]>; 1916} // isReMaterializable, isCodeGenOnly 1917 1918// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the 1919// eventual expansion code fewer bits to worry about getting right. Marshalling 1920// the types is a little tricky though: 1921def i64imm_32bit : ImmLeaf<i64, [{ 1922 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 1923}]>; 1924 1925def s64imm_32bit : ImmLeaf<i64, [{ 1926 int64_t Imm64 = static_cast<int64_t>(Imm); 1927 return Imm64 >= std::numeric_limits<int32_t>::min() && 1928 Imm64 <= std::numeric_limits<int32_t>::max(); 1929}]>; 1930 1931def trunc_imm : SDNodeXForm<imm, [{ 1932 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); 1933}]>; 1934 1935def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, 1936 GISDNodeXFormEquiv<trunc_imm>; 1937 1938let Predicates = [OptimizedGISelOrOtherSelector] in { 1939// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless 1940// copies. 1941def : Pat<(i64 i64imm_32bit:$src), 1942 (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; 1943} 1944 1945// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). 1946def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 1947return CurDAG->getTargetConstant( 1948 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 1949}]>; 1950 1951def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 1952return CurDAG->getTargetConstant( 1953 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 1954}]>; 1955 1956 1957def : Pat<(f32 fpimm:$in), 1958 (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; 1959def : Pat<(f64 fpimm:$in), 1960 (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; 1961 1962 1963// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK 1964// sequences. 1965def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, 1966 tglobaladdr:$g1, tglobaladdr:$g0), 1967 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), 1968 tglobaladdr:$g1, 16), 1969 tglobaladdr:$g2, 32), 1970 tglobaladdr:$g3, 48)>; 1971 1972def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, 1973 tblockaddress:$g1, tblockaddress:$g0), 1974 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), 1975 tblockaddress:$g1, 16), 1976 tblockaddress:$g2, 32), 1977 tblockaddress:$g3, 48)>; 1978 1979def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, 1980 tconstpool:$g1, tconstpool:$g0), 1981 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), 1982 tconstpool:$g1, 16), 1983 tconstpool:$g2, 32), 1984 tconstpool:$g3, 48)>; 1985 1986def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, 1987 tjumptable:$g1, tjumptable:$g0), 1988 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), 1989 tjumptable:$g1, 16), 1990 tjumptable:$g2, 32), 1991 tjumptable:$g3, 48)>; 1992 1993 1994//===----------------------------------------------------------------------===// 1995// Arithmetic instructions. 1996//===----------------------------------------------------------------------===// 1997 1998// Add/subtract with carry. 1999defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; 2000defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; 2001 2002def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; 2003def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; 2004def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; 2005def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; 2006 2007// Add/subtract 2008defm ADD : AddSub<0, "add", "sub", add>; 2009defm SUB : AddSub<1, "sub", "add">; 2010 2011def : InstAlias<"mov $dst, $src", 2012 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; 2013def : InstAlias<"mov $dst, $src", 2014 (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; 2015def : InstAlias<"mov $dst, $src", 2016 (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; 2017def : InstAlias<"mov $dst, $src", 2018 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; 2019 2020defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; 2021defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; 2022 2023def copyFromSP: PatLeaf<(i64 GPR64:$src), [{ 2024 return N->getOpcode() == ISD::CopyFromReg && 2025 cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP; 2026}]>; 2027 2028// Use SUBS instead of SUB to enable CSE between SUBS and SUB. 2029def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), 2030 (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; 2031def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), 2032 (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; 2033def : Pat<(sub GPR32:$Rn, GPR32:$Rm), 2034 (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; 2035def : Pat<(sub GPR64:$Rn, GPR64:$Rm), 2036 (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; 2037def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), 2038 (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; 2039def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), 2040 (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; 2041let AddedComplexity = 1 in { 2042def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), 2043 (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; 2044def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), 2045 (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; 2046def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)), 2047 (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>; 2048} 2049 2050// Because of the immediate format for add/sub-imm instructions, the 2051// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 2052// These patterns capture that transformation. 2053let AddedComplexity = 1 in { 2054def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2055 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2056def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2057 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2058def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2059 (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2060def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2061 (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2062} 2063 2064// Because of the immediate format for add/sub-imm instructions, the 2065// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 2066// These patterns capture that transformation. 2067let AddedComplexity = 1 in { 2068def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2069 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2070def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2071 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2072def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2073 (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2074def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2075 (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2076} 2077 2078def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 2079def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 2080def : InstAlias<"neg $dst, $src$shift", 2081 (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 2082def : InstAlias<"neg $dst, $src$shift", 2083 (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 2084 2085def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 2086def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 2087def : InstAlias<"negs $dst, $src$shift", 2088 (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 2089def : InstAlias<"negs $dst, $src$shift", 2090 (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 2091 2092 2093// Unsigned/Signed divide 2094defm UDIV : Div<0, "udiv", udiv>; 2095defm SDIV : Div<1, "sdiv", sdiv>; 2096 2097def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; 2098def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; 2099def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; 2100def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; 2101 2102// Variable shift 2103defm ASRV : Shift<0b10, "asr", sra>; 2104defm LSLV : Shift<0b00, "lsl", shl>; 2105defm LSRV : Shift<0b01, "lsr", srl>; 2106defm RORV : Shift<0b11, "ror", rotr>; 2107 2108def : ShiftAlias<"asrv", ASRVWr, GPR32>; 2109def : ShiftAlias<"asrv", ASRVXr, GPR64>; 2110def : ShiftAlias<"lslv", LSLVWr, GPR32>; 2111def : ShiftAlias<"lslv", LSLVXr, GPR64>; 2112def : ShiftAlias<"lsrv", LSRVWr, GPR32>; 2113def : ShiftAlias<"lsrv", LSRVXr, GPR64>; 2114def : ShiftAlias<"rorv", RORVWr, GPR32>; 2115def : ShiftAlias<"rorv", RORVXr, GPR64>; 2116 2117// Multiply-add 2118let AddedComplexity = 5 in { 2119defm MADD : MulAccum<0, "madd">; 2120defm MSUB : MulAccum<1, "msub">; 2121 2122def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), 2123 (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 2124def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), 2125 (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 2126 2127def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), 2128 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 2129def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), 2130 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 2131def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), 2132 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 2133def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), 2134 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 2135} // AddedComplexity = 5 2136 2137let AddedComplexity = 5 in { 2138def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; 2139def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; 2140def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; 2141def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; 2142 2143def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))), 2144 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2145def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))), 2146 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2147def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), 2148 (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2149def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))), 2150 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2151def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))), 2152 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2153def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), 2154 (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2155 2156def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), 2157 (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2158def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), 2159 (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2160 2161def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), 2162 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2163def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), 2164 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2165def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), 2166 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2167 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2168 2169def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 2170 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2171def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 2172 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2173def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), 2174 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2175 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2176 2177def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), 2178 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2179def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), 2180 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2181def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), 2182 GPR64:$Ra)), 2183 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2184 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2185 2186def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 2187 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2188def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 2189 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2190def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), 2191 (s64imm_32bit:$C)))), 2192 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2193 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2194 2195def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)), 2196 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2197def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))), 2198 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2199 2200def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)), 2201 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2202def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)), 2203 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2204 2205def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 2206 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2207def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 2208 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2209 2210def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 2211 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2212def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 2213 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2214 2215def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)), 2216 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2217def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))), 2218 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2219 2220def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)), 2221 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2222def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)), 2223 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2224 2225def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))), 2226 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2227def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 2228 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2229 2230def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))), 2231 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2232def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 2233 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2234} // AddedComplexity = 5 2235 2236def : MulAccumWAlias<"mul", MADDWrrr>; 2237def : MulAccumXAlias<"mul", MADDXrrr>; 2238def : MulAccumWAlias<"mneg", MSUBWrrr>; 2239def : MulAccumXAlias<"mneg", MSUBXrrr>; 2240def : WideMulAccumAlias<"smull", SMADDLrrr>; 2241def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; 2242def : WideMulAccumAlias<"umull", UMADDLrrr>; 2243def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; 2244 2245// Multiply-high 2246def SMULHrr : MulHi<0b010, "smulh", mulhs>; 2247def UMULHrr : MulHi<0b110, "umulh", mulhu>; 2248 2249// CRC32 2250def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; 2251def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; 2252def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; 2253def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; 2254 2255def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; 2256def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; 2257def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; 2258def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; 2259 2260// v8.1 atomic CAS 2261defm CAS : CompareAndSwap<0, 0, "">; 2262defm CASA : CompareAndSwap<1, 0, "a">; 2263defm CASL : CompareAndSwap<0, 1, "l">; 2264defm CASAL : CompareAndSwap<1, 1, "al">; 2265 2266// v8.1 atomic CASP 2267defm CASP : CompareAndSwapPair<0, 0, "">; 2268defm CASPA : CompareAndSwapPair<1, 0, "a">; 2269defm CASPL : CompareAndSwapPair<0, 1, "l">; 2270defm CASPAL : CompareAndSwapPair<1, 1, "al">; 2271 2272// v8.1 atomic SWP 2273defm SWP : Swap<0, 0, "">; 2274defm SWPA : Swap<1, 0, "a">; 2275defm SWPL : Swap<0, 1, "l">; 2276defm SWPAL : Swap<1, 1, "al">; 2277 2278// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) 2279defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; 2280defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; 2281defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; 2282defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; 2283 2284defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; 2285defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; 2286defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; 2287defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; 2288 2289defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; 2290defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; 2291defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; 2292defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; 2293 2294defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; 2295defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; 2296defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; 2297defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; 2298 2299defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; 2300defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; 2301defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; 2302defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; 2303 2304defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; 2305defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; 2306defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; 2307defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; 2308 2309defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; 2310defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; 2311defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; 2312defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; 2313 2314defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; 2315defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; 2316defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; 2317defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; 2318 2319// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" 2320defm : STOPregister<"stadd","LDADD">; // STADDx 2321defm : STOPregister<"stclr","LDCLR">; // STCLRx 2322defm : STOPregister<"steor","LDEOR">; // STEORx 2323defm : STOPregister<"stset","LDSET">; // STSETx 2324defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx 2325defm : STOPregister<"stsmin","LDSMIN">;// STSMINx 2326defm : STOPregister<"stumax","LDUMAX">;// STUMAXx 2327defm : STOPregister<"stumin","LDUMIN">;// STUMINx 2328 2329// v8.5 Memory Tagging Extension 2330let Predicates = [HasMTE] in { 2331 2332def IRG : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg", 2333 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>; 2334 2335def GMI : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi", 2336 int_aarch64_gmi, GPR64sp>, Sched<[]> { 2337 let isNotDuplicable = 1; 2338} 2339def ADDG : AddSubG<0, "addg", null_frag>; 2340def SUBG : AddSubG<1, "subg", null_frag>; 2341 2342def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>; 2343 2344def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>; 2345def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{ 2346 let Defs = [NZCV]; 2347} 2348 2349def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>; 2350 2351def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">; 2352 2353def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4), 2354 (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>; 2355def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2356 (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2357 2358def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>; 2359 2360def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]", 2361 (outs GPR64:$Rt), (ins GPR64sp:$Rn)>; 2362def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]", 2363 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>; 2364def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]", 2365 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> { 2366 let Inst{23} = 0; 2367} 2368 2369defm STG : MemTagStore<0b00, "stg">; 2370defm STZG : MemTagStore<0b01, "stzg">; 2371defm ST2G : MemTagStore<0b10, "st2g">; 2372defm STZ2G : MemTagStore<0b11, "stz2g">; 2373 2374def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2375 (STGi $Rn, $Rm, $imm)>; 2376def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2377 (STZGi $Rn, $Rm, $imm)>; 2378def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2379 (ST2Gi $Rn, $Rm, $imm)>; 2380def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2381 (STZ2Gi $Rn, $Rm, $imm)>; 2382 2383defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">; 2384def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">; 2385def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; 2386 2387def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2388 (STGi GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2389 2390def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2), 2391 (STGPi $Rt, $Rt2, $Rn, $imm)>; 2392 2393def IRGstack 2394 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>, 2395 Sched<[]>; 2396def TAGPstack 2397 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>, 2398 Sched<[]>; 2399 2400// Explicit SP in the first operand prevents ShrinkWrap optimization 2401// from leaving this instruction out of the stack frame. When IRGstack 2402// is transformed into IRG, this operand is replaced with the actual 2403// register / expression for the tagged base pointer of the current function. 2404def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; 2405 2406// Large STG to be expanded into a loop. $sz is the size, $Rn is start address. 2407// $Rn_wback is one past the end of the range. $Rm is the loop counter. 2408let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in { 2409def STGloop_wback 2410 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2411 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2412 Sched<[WriteAdr, WriteST]>; 2413 2414def STZGloop_wback 2415 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2416 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2417 Sched<[WriteAdr, WriteST]>; 2418 2419// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn. 2420// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back). 2421def STGloop 2422 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2423 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2424 Sched<[WriteAdr, WriteST]>; 2425 2426def STZGloop 2427 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2428 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2429 Sched<[WriteAdr, WriteST]>; 2430} 2431 2432} // Predicates = [HasMTE] 2433 2434//===----------------------------------------------------------------------===// 2435// Logical instructions. 2436//===----------------------------------------------------------------------===// 2437 2438// (immediate) 2439defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; 2440defm AND : LogicalImm<0b00, "and", and, "bic">; 2441defm EOR : LogicalImm<0b10, "eor", xor, "eon">; 2442defm ORR : LogicalImm<0b01, "orr", or, "orn">; 2443 2444// FIXME: these aliases *are* canonical sometimes (when movz can't be 2445// used). Actually, it seems to be working right now, but putting logical_immXX 2446// here is a bit dodgy on the AsmParser side too. 2447def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, 2448 logical_imm32:$imm), 0>; 2449def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, 2450 logical_imm64:$imm), 0>; 2451 2452 2453// (register) 2454defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; 2455defm BICS : LogicalRegS<0b11, 1, "bics", 2456 BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; 2457defm AND : LogicalReg<0b00, 0, "and", and>; 2458defm BIC : LogicalReg<0b00, 1, "bic", 2459 BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>; 2460defm EON : LogicalReg<0b10, 1, "eon", 2461 BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; 2462defm EOR : LogicalReg<0b10, 0, "eor", xor>; 2463defm ORN : LogicalReg<0b01, 1, "orn", 2464 BinOpFrag<(or node:$LHS, (not node:$RHS))>>; 2465defm ORR : LogicalReg<0b01, 0, "orr", or>; 2466 2467def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; 2468def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; 2469 2470def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; 2471def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; 2472 2473def : InstAlias<"mvn $Wd, $Wm$sh", 2474 (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; 2475def : InstAlias<"mvn $Xd, $Xm$sh", 2476 (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; 2477 2478def : InstAlias<"tst $src1, $src2", 2479 (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; 2480def : InstAlias<"tst $src1, $src2", 2481 (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; 2482 2483def : InstAlias<"tst $src1, $src2", 2484 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; 2485def : InstAlias<"tst $src1, $src2", 2486 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; 2487 2488def : InstAlias<"tst $src1, $src2$sh", 2489 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; 2490def : InstAlias<"tst $src1, $src2$sh", 2491 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; 2492 2493 2494def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; 2495def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; 2496 2497 2498//===----------------------------------------------------------------------===// 2499// One operand data processing instructions. 2500//===----------------------------------------------------------------------===// 2501 2502defm CLS : OneOperandData<0b000101, "cls">; 2503defm CLZ : OneOperandData<0b000100, "clz", ctlz>; 2504defm RBIT : OneOperandData<0b000000, "rbit", bitreverse>; 2505 2506def REV16Wr : OneWRegData<0b000001, "rev16", 2507 UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; 2508def REV16Xr : OneXRegData<0b000001, "rev16", null_frag>; 2509 2510def : Pat<(cttz GPR32:$Rn), 2511 (CLZWr (RBITWr GPR32:$Rn))>; 2512def : Pat<(cttz GPR64:$Rn), 2513 (CLZXr (RBITXr GPR64:$Rn))>; 2514def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), 2515 (i32 1))), 2516 (CLSWr GPR32:$Rn)>; 2517def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), 2518 (i64 1))), 2519 (CLSXr GPR64:$Rn)>; 2520def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>; 2521def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>; 2522 2523// Unlike the other one operand instructions, the instructions with the "rev" 2524// mnemonic do *not* just different in the size bit, but actually use different 2525// opcode bits for the different sizes. 2526def REVWr : OneWRegData<0b000010, "rev", bswap>; 2527def REVXr : OneXRegData<0b000011, "rev", bswap>; 2528def REV32Xr : OneXRegData<0b000010, "rev32", 2529 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; 2530 2531def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; 2532 2533// The bswap commutes with the rotr so we want a pattern for both possible 2534// orders. 2535def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; 2536def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; 2537 2538// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero. 2539def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; 2540def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; 2541 2542def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)), 2543 (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))), 2544 (REV16Xr GPR64:$Rn)>; 2545 2546//===----------------------------------------------------------------------===// 2547// Bitfield immediate extraction instruction. 2548//===----------------------------------------------------------------------===// 2549let hasSideEffects = 0 in 2550defm EXTR : ExtractImm<"extr">; 2551def : InstAlias<"ror $dst, $src, $shift", 2552 (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; 2553def : InstAlias<"ror $dst, $src, $shift", 2554 (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; 2555 2556def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), 2557 (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; 2558def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), 2559 (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; 2560 2561//===----------------------------------------------------------------------===// 2562// Other bitfield immediate instructions. 2563//===----------------------------------------------------------------------===// 2564let hasSideEffects = 0 in { 2565defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; 2566defm SBFM : BitfieldImm<0b00, "sbfm">; 2567defm UBFM : BitfieldImm<0b10, "ubfm">; 2568} 2569 2570def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2571 uint64_t enc = (32 - N->getZExtValue()) & 0x1f; 2572 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2573}]>; 2574 2575def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2576 uint64_t enc = 31 - N->getZExtValue(); 2577 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2578}]>; 2579 2580// min(7, 31 - shift_amt) 2581def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2582 uint64_t enc = 31 - N->getZExtValue(); 2583 enc = enc > 7 ? 7 : enc; 2584 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2585}]>; 2586 2587// min(15, 31 - shift_amt) 2588def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2589 uint64_t enc = 31 - N->getZExtValue(); 2590 enc = enc > 15 ? 15 : enc; 2591 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2592}]>; 2593 2594def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2595 uint64_t enc = (64 - N->getZExtValue()) & 0x3f; 2596 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2597}]>; 2598 2599def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2600 uint64_t enc = 63 - N->getZExtValue(); 2601 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2602}]>; 2603 2604// min(7, 63 - shift_amt) 2605def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2606 uint64_t enc = 63 - N->getZExtValue(); 2607 enc = enc > 7 ? 7 : enc; 2608 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2609}]>; 2610 2611// min(15, 63 - shift_amt) 2612def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2613 uint64_t enc = 63 - N->getZExtValue(); 2614 enc = enc > 15 ? 15 : enc; 2615 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2616}]>; 2617 2618// min(31, 63 - shift_amt) 2619def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ 2620 uint64_t enc = 63 - N->getZExtValue(); 2621 enc = enc > 31 ? 31 : enc; 2622 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2623}]>; 2624 2625def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), 2626 (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 2627 (i64 (i32shift_b imm0_31:$imm)))>; 2628def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), 2629 (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 2630 (i64 (i64shift_b imm0_63:$imm)))>; 2631 2632let AddedComplexity = 10 in { 2633def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), 2634 (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2635def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), 2636 (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2637} 2638 2639def : InstAlias<"asr $dst, $src, $shift", 2640 (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2641def : InstAlias<"asr $dst, $src, $shift", 2642 (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2643def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2644def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2645def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2646def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2647def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2648 2649def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), 2650 (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2651def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), 2652 (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2653 2654def : InstAlias<"lsr $dst, $src, $shift", 2655 (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2656def : InstAlias<"lsr $dst, $src, $shift", 2657 (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2658def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2659def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2660def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2661def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2662def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2663 2664//===----------------------------------------------------------------------===// 2665// Conditional comparison instructions. 2666//===----------------------------------------------------------------------===// 2667defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; 2668defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; 2669 2670//===----------------------------------------------------------------------===// 2671// Conditional select instructions. 2672//===----------------------------------------------------------------------===// 2673defm CSEL : CondSelect<0, 0b00, "csel">; 2674 2675def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; 2676defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; 2677defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; 2678defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; 2679 2680def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2681 (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2682def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2683 (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2684def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2685 (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2686def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2687 (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2688def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2689 (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2690def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2691 (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2692 2693def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), 2694 (CSINCWr WZR, WZR, (i32 imm:$cc))>; 2695def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), 2696 (CSINCXr XZR, XZR, (i32 imm:$cc))>; 2697def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), 2698 (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2699def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), 2700 (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2701def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), 2702 (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2703def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), 2704 (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2705def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), 2706 (CSINVWr WZR, WZR, (i32 imm:$cc))>; 2707def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), 2708 (CSINVXr XZR, XZR, (i32 imm:$cc))>; 2709def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), 2710 (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2711def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), 2712 (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2713def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), 2714 (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2715def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), 2716 (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2717 2718def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2719 (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>; 2720def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2721 (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>; 2722 2723def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2724 (CSINCWr GPR32:$val, WZR, imm:$cc)>; 2725def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2726 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2727def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2728 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2729 2730def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2731 (CSELWr WZR, GPR32:$val, imm:$cc)>; 2732def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2733 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2734def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2735 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2736 2737// The inverse of the condition code from the alias instruction is what is used 2738// in the aliased instruction. The parser all ready inverts the condition code 2739// for these aliases. 2740def : InstAlias<"cset $dst, $cc", 2741 (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2742def : InstAlias<"cset $dst, $cc", 2743 (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2744 2745def : InstAlias<"csetm $dst, $cc", 2746 (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2747def : InstAlias<"csetm $dst, $cc", 2748 (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2749 2750def : InstAlias<"cinc $dst, $src, $cc", 2751 (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2752def : InstAlias<"cinc $dst, $src, $cc", 2753 (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2754 2755def : InstAlias<"cinv $dst, $src, $cc", 2756 (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2757def : InstAlias<"cinv $dst, $src, $cc", 2758 (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2759 2760def : InstAlias<"cneg $dst, $src, $cc", 2761 (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2762def : InstAlias<"cneg $dst, $src, $cc", 2763 (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2764 2765//===----------------------------------------------------------------------===// 2766// PC-relative instructions. 2767//===----------------------------------------------------------------------===// 2768let isReMaterializable = 1 in { 2769let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 2770def ADR : ADRI<0, "adr", adrlabel, 2771 [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>; 2772} // hasSideEffects = 0 2773 2774def ADRP : ADRI<1, "adrp", adrplabel, 2775 [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; 2776} // isReMaterializable = 1 2777 2778// page address of a constant pool entry, block address 2779def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>; 2780def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>; 2781def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>; 2782def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>; 2783def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; 2784def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; 2785def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; 2786 2787//===----------------------------------------------------------------------===// 2788// Unconditional branch (register) instructions. 2789//===----------------------------------------------------------------------===// 2790 2791let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 2792def RET : BranchReg<0b0010, "ret", []>; 2793def DRPS : SpecialReturn<0b0101, "drps">; 2794def ERET : SpecialReturn<0b0100, "eret">; 2795} // isReturn = 1, isTerminator = 1, isBarrier = 1 2796 2797// Default to the LR register. 2798def : InstAlias<"ret", (RET LR)>; 2799 2800let isCall = 1, Defs = [LR], Uses = [SP] in { 2801 def BLR : BranchReg<0b0001, "blr", []>; 2802 def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>, 2803 Sched<[WriteBrReg]>, 2804 PseudoInstExpansion<(BLR GPR64:$Rn)>; 2805 def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, 2806 Sched<[WriteBrReg]>; 2807 def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>, 2808 Sched<[WriteBrReg]>; 2809 let Uses = [X16, SP] in 2810 def BLR_X16 : Pseudo<(outs), (ins), [(AArch64call_arm64ec_to_x64 X16)]>, 2811 Sched<[WriteBrReg]>, 2812 PseudoInstExpansion<(BLR X16)>; 2813} // isCall 2814 2815def : Pat<(AArch64call GPR64:$Rn), 2816 (BLR GPR64:$Rn)>, 2817 Requires<[NoSLSBLRMitigation]>; 2818def : Pat<(AArch64call GPR64noip:$Rn), 2819 (BLRNoIP GPR64noip:$Rn)>, 2820 Requires<[SLSBLRMitigation]>; 2821 2822def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), 2823 (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, 2824 Requires<[NoSLSBLRMitigation]>; 2825 2826def : Pat<(AArch64call_bti GPR64:$Rn), 2827 (BLR_BTI GPR64:$Rn)>, 2828 Requires<[NoSLSBLRMitigation]>; 2829def : Pat<(AArch64call_bti GPR64noip:$Rn), 2830 (BLR_BTI GPR64noip:$Rn)>, 2831 Requires<[SLSBLRMitigation]>; 2832 2833let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 2834def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; 2835} // isBranch, isTerminator, isBarrier, isIndirectBranch 2836 2837// Create a separate pseudo-instruction for codegen to use so that we don't 2838// flag lr as used in every function. It'll be restored before the RET by the 2839// epilogue if it's legitimately used. 2840def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>, 2841 Sched<[WriteBrReg]> { 2842 let isTerminator = 1; 2843 let isBarrier = 1; 2844 let isReturn = 1; 2845} 2846 2847// This is a directive-like pseudo-instruction. The purpose is to insert an 2848// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction 2849// (which in the usual case is a BLR). 2850let hasSideEffects = 1 in 2851def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { 2852 let AsmString = ".tlsdesccall $sym"; 2853} 2854 2855// Pseudo instruction to tell the streamer to emit a 'B' character into the 2856// augmentation string. 2857def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {} 2858 2859// Pseudo instruction to tell the streamer to emit a 'G' character into the 2860// augmentation string. 2861def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {} 2862 2863// FIXME: maybe the scratch register used shouldn't be fixed to X1? 2864// FIXME: can "hasSideEffects be dropped? 2865// This gets lowered to an instruction sequence which takes 16 bytes 2866let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16, 2867 isCodeGenOnly = 1 in 2868def TLSDESC_CALLSEQ 2869 : Pseudo<(outs), (ins i64imm:$sym), 2870 [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, 2871 Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; 2872def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), 2873 (TLSDESC_CALLSEQ texternalsym:$sym)>; 2874 2875//===----------------------------------------------------------------------===// 2876// Conditional branch (immediate) instruction. 2877//===----------------------------------------------------------------------===// 2878def Bcc : BranchCond<0, "b">; 2879 2880// Armv8.8-A variant form which hints to the branch predictor that 2881// this branch is very likely to go the same way nearly all the time 2882// (even though it is not known at compile time _which_ way that is). 2883def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; 2884 2885//===----------------------------------------------------------------------===// 2886// Compare-and-branch instructions. 2887//===----------------------------------------------------------------------===// 2888defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; 2889defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; 2890 2891//===----------------------------------------------------------------------===// 2892// Test-bit-and-branch instructions. 2893//===----------------------------------------------------------------------===// 2894defm TBZ : TestBranch<0, "tbz", AArch64tbz>; 2895defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; 2896 2897//===----------------------------------------------------------------------===// 2898// Unconditional branch (immediate) instructions. 2899//===----------------------------------------------------------------------===// 2900let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 2901def B : BranchImm<0, "b", [(br bb:$addr)]>; 2902} // isBranch, isTerminator, isBarrier 2903 2904let isCall = 1, Defs = [LR], Uses = [SP] in { 2905def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; 2906} // isCall 2907def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; 2908 2909//===----------------------------------------------------------------------===// 2910// Exception generation instructions. 2911//===----------------------------------------------------------------------===// 2912let isTrap = 1 in { 2913def BRK : ExceptionGeneration<0b001, 0b00, "brk", 2914 [(int_aarch64_break timm32_0_65535:$imm)]>; 2915} 2916def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; 2917def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; 2918def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>; 2919def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; 2920def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; 2921def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>; 2922def SVC : ExceptionGeneration<0b000, 0b01, "svc">; 2923 2924// DCPSn defaults to an immediate operand of zero if unspecified. 2925def : InstAlias<"dcps1", (DCPS1 0)>; 2926def : InstAlias<"dcps2", (DCPS2 0)>; 2927def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>; 2928 2929def UDF : UDFType<0, "udf">; 2930 2931//===----------------------------------------------------------------------===// 2932// Load instructions. 2933//===----------------------------------------------------------------------===// 2934 2935// Pair (indexed, offset) 2936defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; 2937defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; 2938let Predicates = [HasFPARMv8] in { 2939defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">; 2940defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">; 2941defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">; 2942} 2943 2944defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2945 2946// Pair (pre-indexed) 2947def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2948def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2949let Predicates = [HasFPARMv8] in { 2950def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2951def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2952def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2953} 2954 2955def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2956 2957// Pair (post-indexed) 2958def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2959def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2960let Predicates = [HasFPARMv8] in { 2961def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2962def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2963def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2964} 2965 2966def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2967 2968 2969// Pair (no allocate) 2970defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">; 2971defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">; 2972let Predicates = [HasFPARMv8] in { 2973defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">; 2974defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">; 2975defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">; 2976} 2977 2978def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 2979 (LDPXi GPR64sp:$Rn, simm7s8:$offset)>; 2980 2981def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 2982 (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>; 2983//--- 2984// (register offset) 2985//--- 2986 2987// Integer 2988defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; 2989defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; 2990defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; 2991defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; 2992 2993// Floating-point 2994let Predicates = [HasFPARMv8] in { 2995defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", i8, load>; 2996defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; 2997defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; 2998defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; 2999defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>; 3000} 3001 3002// Load sign-extended half-word 3003defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; 3004defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; 3005 3006// Load sign-extended byte 3007defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; 3008defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; 3009 3010// Load sign-extended word 3011defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; 3012 3013// Pre-fetch. 3014defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; 3015 3016// For regular load, we do not have any alignment requirement. 3017// Thus, it is safe to directly map the vector loads with interesting 3018// addressing modes. 3019// FIXME: We could do the same for bitconvert to floating point vectors. 3020multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, 3021 ValueType ScalTy, ValueType VecTy, 3022 Instruction LOADW, Instruction LOADX, 3023 SubRegIndex sub> { 3024 def : Pat<(VecTy (scalar_to_vector (ScalTy 3025 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), 3026 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 3027 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), 3028 sub)>; 3029 3030 def : Pat<(VecTy (scalar_to_vector (ScalTy 3031 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), 3032 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 3033 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), 3034 sub)>; 3035} 3036 3037let AddedComplexity = 10 in { 3038defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; 3039defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; 3040 3041defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; 3042defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; 3043 3044defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>; 3045defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>; 3046 3047defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; 3048defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; 3049 3050defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; 3051defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; 3052 3053defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; 3054 3055defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; 3056 3057 3058def : Pat <(v1i64 (scalar_to_vector (i64 3059 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, 3060 ro_Wextend64:$extend))))), 3061 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; 3062 3063def : Pat <(v1i64 (scalar_to_vector (i64 3064 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, 3065 ro_Xextend64:$extend))))), 3066 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; 3067} 3068 3069// Match all load 64 bits width whose type is compatible with FPR64 3070multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, 3071 Instruction LOADW, Instruction LOADX> { 3072 3073 def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 3074 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3075 3076 def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 3077 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3078} 3079 3080let AddedComplexity = 10 in { 3081let Predicates = [IsLE] in { 3082 // We must do vector loads with LD1 in big-endian. 3083 defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; 3084 defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; 3085 defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; 3086 defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; 3087 defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>; 3088 defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>; 3089} 3090 3091defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; 3092defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; 3093 3094// Match all load 128 bits width whose type is compatible with FPR128 3095let Predicates = [IsLE] in { 3096 // We must do vector loads with LD1 in big-endian. 3097 defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; 3098 defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; 3099 defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; 3100 defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; 3101 defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; 3102 defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>; 3103 defm : VecROLoadPat<ro128, v8bf16, LDRQroW, LDRQroX>; 3104 defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; 3105} 3106} // AddedComplexity = 10 3107 3108// zextload -> i64 3109multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, 3110 Instruction INSTW, Instruction INSTX> { 3111 def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 3112 (SUBREG_TO_REG (i64 0), 3113 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 3114 sub_32)>; 3115 3116 def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 3117 (SUBREG_TO_REG (i64 0), 3118 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 3119 sub_32)>; 3120} 3121 3122let AddedComplexity = 10 in { 3123 defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; 3124 defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; 3125 defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; 3126 3127 // zextloadi1 -> zextloadi8 3128 defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 3129 3130 // extload -> zextload 3131 defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 3132 defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 3133 defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 3134 3135 // extloadi1 -> zextloadi8 3136 defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; 3137} 3138 3139 3140// zextload -> i64 3141multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, 3142 Instruction INSTW, Instruction INSTX> { 3143 def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 3144 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3145 3146 def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 3147 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3148 3149} 3150 3151let AddedComplexity = 10 in { 3152 // extload -> zextload 3153 defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 3154 defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 3155 defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 3156 3157 // zextloadi1 -> zextloadi8 3158 defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 3159} 3160 3161//--- 3162// (unsigned immediate) 3163//--- 3164defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr", 3165 [(set GPR64z:$Rt, 3166 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 3167defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr", 3168 [(set GPR32z:$Rt, 3169 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 3170let Predicates = [HasFPARMv8] in { 3171defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", 3172 [(set FPR8Op:$Rt, 3173 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; 3174defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", 3175 [(set (f16 FPR16Op:$Rt), 3176 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; 3177defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr", 3178 [(set (f32 FPR32Op:$Rt), 3179 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 3180defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr", 3181 [(set (f64 FPR64Op:$Rt), 3182 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 3183defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr", 3184 [(set (f128 FPR128Op:$Rt), 3185 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; 3186} 3187 3188// bf16 load pattern 3189def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3190 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 3191 3192// For regular load, we do not have any alignment requirement. 3193// Thus, it is safe to directly map the vector loads with interesting 3194// addressing modes. 3195// FIXME: We could do the same for bitconvert to floating point vectors. 3196def : Pat <(v8i8 (scalar_to_vector (i32 3197 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3198 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 3199 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 3200def : Pat <(v16i8 (scalar_to_vector (i32 3201 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3202 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3203 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 3204def : Pat <(v4i16 (scalar_to_vector (i32 3205 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3206 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 3207 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 3208def : Pat <(v8i16 (scalar_to_vector (i32 3209 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3210 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 3211 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 3212def : Pat <(v2i32 (scalar_to_vector (i32 3213 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3214 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 3215 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 3216def : Pat <(v4i32 (scalar_to_vector (i32 3217 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3218 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 3219 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 3220def : Pat <(v1i64 (scalar_to_vector (i64 3221 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 3222 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3223def : Pat <(v2i64 (scalar_to_vector (i64 3224 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 3225 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 3226 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; 3227 3228// Match all load 64 bits width whose type is compatible with FPR64 3229let Predicates = [IsLE] in { 3230 // We must use LD1 to perform vector loads in big-endian. 3231 def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3232 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3233 def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3234 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3235 def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3236 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3237 def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3238 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3239 def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3240 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3241 def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3242 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3243} 3244def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3245 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3246def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3247 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3248 3249// Match all load 128 bits width whose type is compatible with FPR128 3250let Predicates = [IsLE] in { 3251 // We must use LD1 to perform vector loads in big-endian. 3252 def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3253 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3254 def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3255 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3256 def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3257 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3258 def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3259 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3260 def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3261 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3262 def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3263 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3264 def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3265 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3266 def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3267 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3268} 3269def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3270 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3271 3272defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", 3273 [(set GPR32:$Rt, 3274 (zextloadi16 (am_indexed16 GPR64sp:$Rn, 3275 uimm12s2:$offset)))]>; 3276defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", 3277 [(set GPR32:$Rt, 3278 (zextloadi8 (am_indexed8 GPR64sp:$Rn, 3279 uimm12s1:$offset)))]>; 3280// zextload -> i64 3281def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3282 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3283def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3284 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3285 3286// zextloadi1 -> zextloadi8 3287def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3288 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3289def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3290 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3291 3292// extload -> zextload 3293def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3294 (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; 3295def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3296 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3297def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3298 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3299def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3300 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3301def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3302 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3303def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3304 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3305def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3306 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3307 3308// load sign-extended half-word 3309defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", 3310 [(set GPR32:$Rt, 3311 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3312 uimm12s2:$offset)))]>; 3313defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", 3314 [(set GPR64:$Rt, 3315 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3316 uimm12s2:$offset)))]>; 3317 3318// load sign-extended byte 3319defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", 3320 [(set GPR32:$Rt, 3321 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3322 uimm12s1:$offset)))]>; 3323defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", 3324 [(set GPR64:$Rt, 3325 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3326 uimm12s1:$offset)))]>; 3327 3328// load sign-extended word 3329defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", 3330 [(set GPR64:$Rt, 3331 (sextloadi32 (am_indexed32 GPR64sp:$Rn, 3332 uimm12s4:$offset)))]>; 3333 3334// load zero-extended word 3335def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3336 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3337 3338// Pre-fetch. 3339def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", 3340 [(AArch64Prefetch timm:$Rt, 3341 (am_indexed64 GPR64sp:$Rn, 3342 uimm12s8:$offset))]>; 3343 3344def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; 3345 3346//--- 3347// (literal) 3348 3349def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{ 3350 if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) { 3351 const DataLayout &DL = MF->getDataLayout(); 3352 Align Align = G->getGlobal()->getPointerAlignment(DL); 3353 return Align >= 4 && G->getOffset() % 4 == 0; 3354 } 3355 if (auto *C = dyn_cast<ConstantPoolSDNode>(N)) 3356 return C->getAlign() >= 4 && C->getOffset() % 4 == 0; 3357 return false; 3358}]>; 3359 3360def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr", 3361 [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3362def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr", 3363 [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3364let Predicates = [HasFPARMv8] in { 3365def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr", 3366 [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3367def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr", 3368 [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3369def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr", 3370 [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3371} 3372 3373// load sign-extended word 3374def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw", 3375 [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>; 3376 3377let AddedComplexity = 20 in { 3378def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))), 3379 (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>; 3380} 3381 3382// prefetch 3383def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; 3384// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; 3385 3386//--- 3387// (unscaled immediate) 3388defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur", 3389 [(set GPR64z:$Rt, 3390 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3391defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur", 3392 [(set GPR32z:$Rt, 3393 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3394let Predicates = [HasFPARMv8] in { 3395defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", 3396 [(set FPR8Op:$Rt, 3397 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3398defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", 3399 [(set (f16 FPR16Op:$Rt), 3400 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3401defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", 3402 [(set (f32 FPR32Op:$Rt), 3403 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3404defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur", 3405 [(set (f64 FPR64Op:$Rt), 3406 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3407defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur", 3408 [(set (f128 FPR128Op:$Rt), 3409 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; 3410} 3411 3412defm LDURHH 3413 : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", 3414 [(set GPR32:$Rt, 3415 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3416defm LDURBB 3417 : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", 3418 [(set GPR32:$Rt, 3419 (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3420 3421// bf16 load pattern 3422def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3423 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 3424 3425// Match all load 64 bits width whose type is compatible with FPR64 3426let Predicates = [IsLE] in { 3427 def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3428 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3429 def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3430 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3431 def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3432 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3433 def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3434 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3435 def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3436 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3437} 3438def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3439 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3440def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3441 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3442 3443// Match all load 128 bits width whose type is compatible with FPR128 3444let Predicates = [IsLE] in { 3445 def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3446 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3447 def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3448 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3449 def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3450 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3451 def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3452 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3453 def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3454 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3455 def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3456 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3457 def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3458 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3459} 3460 3461// anyext -> zext 3462def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3463 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3464def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3465 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3466def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3467 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3468def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3469 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3470def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3471 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3472def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3473 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3474def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3475 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3476// unscaled zext 3477def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3478 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3479def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3480 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3481def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3482 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3483def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3484 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3485def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3486 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3487def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3488 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3489def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3490 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3491 3492 3493//--- 3494// LDR mnemonics fall back to LDUR for negative or unaligned offsets. 3495 3496// Define new assembler match classes as we want to only match these when 3497// the don't otherwise match the scaled addressing mode for LDR/STR. Don't 3498// associate a DiagnosticType either, as we want the diagnostic for the 3499// canonical form (the scaled operand) to take precedence. 3500class SImm9OffsetOperand<int Width> : AsmOperandClass { 3501 let Name = "SImm9OffsetFB" # Width; 3502 let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; 3503 let RenderMethod = "addImmOperands"; 3504} 3505 3506def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; 3507def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; 3508def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; 3509def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; 3510def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; 3511 3512def simm9_offset_fb8 : Operand<i64> { 3513 let ParserMatchClass = SImm9OffsetFB8Operand; 3514} 3515def simm9_offset_fb16 : Operand<i64> { 3516 let ParserMatchClass = SImm9OffsetFB16Operand; 3517} 3518def simm9_offset_fb32 : Operand<i64> { 3519 let ParserMatchClass = SImm9OffsetFB32Operand; 3520} 3521def simm9_offset_fb64 : Operand<i64> { 3522 let ParserMatchClass = SImm9OffsetFB64Operand; 3523} 3524def simm9_offset_fb128 : Operand<i64> { 3525 let ParserMatchClass = SImm9OffsetFB128Operand; 3526} 3527 3528def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3529 (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3530def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3531 (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3532def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3533 (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3534def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3535 (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3536def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3537 (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3538def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3539 (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3540def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3541 (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 3542 3543// zextload -> i64 3544def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3545 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3546def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3547 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3548 3549// load sign-extended half-word 3550defm LDURSHW 3551 : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", 3552 [(set GPR32:$Rt, 3553 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3554defm LDURSHX 3555 : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", 3556 [(set GPR64:$Rt, 3557 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3558 3559// load sign-extended byte 3560defm LDURSBW 3561 : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", 3562 [(set GPR32:$Rt, 3563 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3564defm LDURSBX 3565 : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", 3566 [(set GPR64:$Rt, 3567 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3568 3569// load sign-extended word 3570defm LDURSW 3571 : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", 3572 [(set GPR64:$Rt, 3573 (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3574 3575// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. 3576def : InstAlias<"ldrb $Rt, [$Rn, $offset]", 3577 (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3578def : InstAlias<"ldrh $Rt, [$Rn, $offset]", 3579 (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3580def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3581 (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3582def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3583 (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3584def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3585 (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3586def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3587 (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3588def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", 3589 (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3590 3591// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros, 3592// load, 0) can use a single load. 3593multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT, 3594 ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst, 3595 ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm, 3596 SubRegIndex SubReg> { 3597 // Scaled 3598 def : Pat <(vector_insert (VT immAllZerosV), 3599 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3600 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3601 // Unscaled 3602 def : Pat <(vector_insert (VT immAllZerosV), 3603 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3604 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3605 3606 // Half-vector patterns 3607 def : Pat <(vector_insert (HVT immAllZerosV), 3608 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3609 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3610 // Unscaled 3611 def : Pat <(vector_insert (HVT immAllZerosV), 3612 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3613 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3614 3615 // SVE patterns 3616 def : Pat <(vector_insert (SVT immAllZerosV), 3617 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3618 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3619 // Unscaled 3620 def : Pat <(vector_insert (SVT immAllZerosV), 3621 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3622 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3623} 3624 3625defm : LoadInsertZeroPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32, LDRBui, LDURBi, 3626 am_indexed8, am_unscaled8, uimm12s1, bsub>; 3627defm : LoadInsertZeroPatterns<extloadi16, v8i16, v4i16, nxv8i16, i32, LDRHui, LDURHi, 3628 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3629defm : LoadInsertZeroPatterns<load, v4i32, v2i32, nxv4i32, i32, LDRSui, LDURSi, 3630 am_indexed32, am_unscaled32, uimm12s4, ssub>; 3631defm : LoadInsertZeroPatterns<load, v2i64, v1i64, nxv2i64, i64, LDRDui, LDURDi, 3632 am_indexed64, am_unscaled64, uimm12s8, dsub>; 3633defm : LoadInsertZeroPatterns<load, v8f16, v4f16, nxv8f16, f16, LDRHui, LDURHi, 3634 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3635defm : LoadInsertZeroPatterns<load, v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi, 3636 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3637defm : LoadInsertZeroPatterns<load, v4f32, v2f32, nxv4f32, f32, LDRSui, LDURSi, 3638 am_indexed32, am_unscaled32, uimm12s4, ssub>; 3639defm : LoadInsertZeroPatterns<load, v2f64, v1f64, nxv2f64, f64, LDRDui, LDURDi, 3640 am_indexed64, am_unscaled64, uimm12s8, dsub>; 3641 3642// Pre-fetch. 3643defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", 3644 [(AArch64Prefetch timm:$Rt, 3645 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3646 3647//--- 3648// (unscaled immediate, unprivileged) 3649defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; 3650defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; 3651 3652defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; 3653defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; 3654 3655// load sign-extended half-word 3656defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; 3657defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; 3658 3659// load sign-extended byte 3660defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; 3661defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; 3662 3663// load sign-extended word 3664defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; 3665 3666//--- 3667// (immediate pre-indexed) 3668def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3669def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3670let Predicates = [HasFPARMv8] in { 3671def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3672def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3673def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3674def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3675def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3676} 3677 3678// load sign-extended half-word 3679def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3680def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3681 3682// load sign-extended byte 3683def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3684def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3685 3686// load zero-extended byte 3687def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3688def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3689 3690// load sign-extended word 3691def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3692 3693//--- 3694// (immediate post-indexed) 3695def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3696def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3697let Predicates = [HasFPARMv8] in { 3698def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3699def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3700def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3701def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3702def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3703} 3704 3705// load sign-extended half-word 3706def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3707def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3708 3709// load sign-extended byte 3710def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3711def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3712 3713// load zero-extended byte 3714def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3715def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3716 3717// load sign-extended word 3718def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3719 3720//===----------------------------------------------------------------------===// 3721// Store instructions. 3722//===----------------------------------------------------------------------===// 3723 3724// Pair (indexed, offset) 3725// FIXME: Use dedicated range-checked addressing mode operand here. 3726defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">; 3727defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">; 3728let Predicates = [HasFPARMv8] in { 3729defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">; 3730defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">; 3731defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">; 3732} 3733 3734// Pair (pre-indexed) 3735def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3736def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3737let Predicates = [HasFPARMv8] in { 3738def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3739def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3740def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3741} 3742 3743// Pair (post-indexed) 3744def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3745def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3746let Predicates = [HasFPARMv8] in { 3747def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3748def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3749def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3750} 3751 3752// Pair (no allocate) 3753defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">; 3754defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">; 3755let Predicates = [HasFPARMv8] in { 3756defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">; 3757defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">; 3758defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">; 3759} 3760 3761def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 3762 (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>; 3763 3764def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 3765 (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>; 3766 3767 3768//--- 3769// (Register offset) 3770 3771// Integer 3772defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; 3773defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; 3774defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; 3775defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; 3776 3777 3778// Floating-point 3779let Predicates = [HasFPARMv8] in { 3780defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", i8, store>; 3781defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; 3782defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; 3783defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; 3784defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">; 3785} 3786 3787let Predicates = [UseSTRQro], AddedComplexity = 10 in { 3788 def : Pat<(store (f128 FPR128:$Rt), 3789 (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, 3790 ro_Wextend128:$extend)), 3791 (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; 3792 def : Pat<(store (f128 FPR128:$Rt), 3793 (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, 3794 ro_Xextend128:$extend)), 3795 (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; 3796} 3797 3798multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop, 3799 Instruction STRW, Instruction STRX> { 3800 3801 def : Pat<(storeop GPR64:$Rt, 3802 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3803 (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3804 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3805 3806 def : Pat<(storeop GPR64:$Rt, 3807 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3808 (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3809 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3810} 3811 3812let AddedComplexity = 10 in { 3813 // truncstore i64 3814 defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>; 3815 defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>; 3816 defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>; 3817} 3818 3819multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR, 3820 Instruction STRW, Instruction STRX> { 3821 def : Pat<(store (VecTy FPR:$Rt), 3822 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3823 (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3824 3825 def : Pat<(store (VecTy FPR:$Rt), 3826 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3827 (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3828} 3829 3830let AddedComplexity = 10 in { 3831// Match all store 64 bits width whose type is compatible with FPR64 3832let Predicates = [IsLE] in { 3833 // We must use ST1 to store vectors in big-endian. 3834 defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>; 3835 defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>; 3836 defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>; 3837 defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>; 3838 defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>; 3839 defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>; 3840} 3841 3842defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>; 3843defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>; 3844 3845// Match all store 128 bits width whose type is compatible with FPR128 3846let Predicates = [IsLE, UseSTRQro] in { 3847 // We must use ST1 to store vectors in big-endian. 3848 defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>; 3849 defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>; 3850 defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>; 3851 defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>; 3852 defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>; 3853 defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>; 3854 defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>; 3855 defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>; 3856} 3857} // AddedComplexity = 10 3858 3859// Match stores from lane 0 to the appropriate subreg's store. 3860multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop, 3861 ValueType VecTy, ValueType STy, 3862 ValueType SubRegTy, 3863 SubRegIndex SubRegIdx, 3864 Instruction STRW, Instruction STRX> { 3865 3866 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))), 3867 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3868 (STRW (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), 3869 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3870 3871 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))), 3872 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3873 (STRX (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), 3874 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3875} 3876 3877let AddedComplexity = 19 in { 3878 defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, f16, hsub, STRHroW, STRHroX>; 3879 defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, f16, hsub, STRHroW, STRHroX>; 3880 defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, i32, ssub, STRSroW, STRSroX>; 3881 defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, i32, ssub, STRSroW, STRSroX>; 3882 defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, i64, dsub, STRDroW, STRDroX>; 3883 defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, i64, dsub, STRDroW, STRDroX>; 3884} 3885 3886//--- 3887// (unsigned immediate) 3888defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", 3889 [(store GPR64z:$Rt, 3890 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3891defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", 3892 [(store GPR32z:$Rt, 3893 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3894let Predicates = [HasFPARMv8] in { 3895defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", 3896 [(store FPR8Op:$Rt, 3897 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; 3898defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", 3899 [(store (f16 FPR16Op:$Rt), 3900 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; 3901defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str", 3902 [(store (f32 FPR32Op:$Rt), 3903 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3904defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str", 3905 [(store (f64 FPR64Op:$Rt), 3906 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3907defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>; 3908} 3909 3910defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", 3911 [(truncstorei16 GPR32z:$Rt, 3912 (am_indexed16 GPR64sp:$Rn, 3913 uimm12s2:$offset))]>; 3914defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", 3915 [(truncstorei8 GPR32z:$Rt, 3916 (am_indexed8 GPR64sp:$Rn, 3917 uimm12s1:$offset))]>; 3918 3919// bf16 store pattern 3920def : Pat<(store (bf16 FPR16Op:$Rt), 3921 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3922 (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>; 3923 3924let AddedComplexity = 10 in { 3925 3926// Match all store 64 bits width whose type is compatible with FPR64 3927def : Pat<(store (v1i64 FPR64:$Rt), 3928 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3929 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3930def : Pat<(store (v1f64 FPR64:$Rt), 3931 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3932 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3933 3934let Predicates = [IsLE] in { 3935 // We must use ST1 to store vectors in big-endian. 3936 def : Pat<(store (v2f32 FPR64:$Rt), 3937 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3938 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3939 def : Pat<(store (v8i8 FPR64:$Rt), 3940 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3941 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3942 def : Pat<(store (v4i16 FPR64:$Rt), 3943 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3944 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3945 def : Pat<(store (v2i32 FPR64:$Rt), 3946 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3947 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3948 def : Pat<(store (v4f16 FPR64:$Rt), 3949 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3950 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3951 def : Pat<(store (v4bf16 FPR64:$Rt), 3952 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3953 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3954} 3955 3956// Match all store 128 bits width whose type is compatible with FPR128 3957def : Pat<(store (f128 FPR128:$Rt), 3958 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3959 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3960 3961let Predicates = [IsLE] in { 3962 // We must use ST1 to store vectors in big-endian. 3963 def : Pat<(store (v4f32 FPR128:$Rt), 3964 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3965 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3966 def : Pat<(store (v2f64 FPR128:$Rt), 3967 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3968 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3969 def : Pat<(store (v16i8 FPR128:$Rt), 3970 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3971 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3972 def : Pat<(store (v8i16 FPR128:$Rt), 3973 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3974 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3975 def : Pat<(store (v4i32 FPR128:$Rt), 3976 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3977 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3978 def : Pat<(store (v2i64 FPR128:$Rt), 3979 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3980 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3981 def : Pat<(store (v8f16 FPR128:$Rt), 3982 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3983 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3984 def : Pat<(store (v8bf16 FPR128:$Rt), 3985 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3986 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3987} 3988 3989// truncstore i64 3990def : Pat<(truncstorei32 GPR64:$Rt, 3991 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), 3992 (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; 3993def : Pat<(truncstorei16 GPR64:$Rt, 3994 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3995 (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; 3996def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), 3997 (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; 3998 3999} // AddedComplexity = 10 4000 4001// Match stores from lane 0 to the appropriate subreg's store. 4002multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop, 4003 ValueType VTy, ValueType STy, 4004 ValueType SubRegTy, 4005 SubRegIndex SubRegIdx, Operand IndexType, 4006 Instruction STR> { 4007 def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), (i64 0))), 4008 (UIAddrMode GPR64sp:$Rn, IndexType:$offset)), 4009 (STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), 4010 GPR64sp:$Rn, IndexType:$offset)>; 4011} 4012 4013let AddedComplexity = 19 in { 4014 defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>; 4015 defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, f16, hsub, uimm12s2, STRHui>; 4016 defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, i32, ssub, uimm12s4, STRSui>; 4017 defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, i32, ssub, uimm12s4, STRSui>; 4018 defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, i64, dsub, uimm12s8, STRDui>; 4019 defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, i64, dsub, uimm12s8, STRDui>; 4020} 4021 4022//--- 4023// (unscaled immediate) 4024defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur", 4025 [(store GPR64z:$Rt, 4026 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 4027defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur", 4028 [(store GPR32z:$Rt, 4029 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 4030let Predicates = [HasFPARMv8] in { 4031defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", 4032 [(store FPR8Op:$Rt, 4033 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 4034defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", 4035 [(store (f16 FPR16Op:$Rt), 4036 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 4037defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur", 4038 [(store (f32 FPR32Op:$Rt), 4039 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 4040defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur", 4041 [(store (f64 FPR64Op:$Rt), 4042 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 4043defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur", 4044 [(store (f128 FPR128Op:$Rt), 4045 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; 4046} 4047defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh", 4048 [(truncstorei16 GPR32z:$Rt, 4049 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 4050defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb", 4051 [(truncstorei8 GPR32z:$Rt, 4052 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 4053 4054// bf16 store pattern 4055def : Pat<(store (bf16 FPR16Op:$Rt), 4056 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 4057 (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4058 4059// Armv8.4 Weaker Release Consistency enhancements 4060// LDAPR & STLR with Immediate Offset instructions 4061let Predicates = [HasRCPC_IMMO] in { 4062defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; 4063defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; 4064defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; 4065defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>; 4066defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>; 4067defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>; 4068defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>; 4069defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>; 4070defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>; 4071defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>; 4072defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>; 4073defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>; 4074defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>; 4075} 4076 4077// Match all store 64 bits width whose type is compatible with FPR64 4078def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4079 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4080def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4081 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4082 4083let AddedComplexity = 10 in { 4084 4085let Predicates = [IsLE] in { 4086 // We must use ST1 to store vectors in big-endian. 4087 def : Pat<(store (v2f32 FPR64:$Rt), 4088 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4089 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4090 def : Pat<(store (v8i8 FPR64:$Rt), 4091 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4092 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4093 def : Pat<(store (v4i16 FPR64:$Rt), 4094 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4095 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4096 def : Pat<(store (v2i32 FPR64:$Rt), 4097 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4098 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4099 def : Pat<(store (v4f16 FPR64:$Rt), 4100 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4101 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4102 def : Pat<(store (v4bf16 FPR64:$Rt), 4103 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4104 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4105} 4106 4107// Match all store 128 bits width whose type is compatible with FPR128 4108def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4109 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4110 4111let Predicates = [IsLE] in { 4112 // We must use ST1 to store vectors in big-endian. 4113 def : Pat<(store (v4f32 FPR128:$Rt), 4114 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4115 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4116 def : Pat<(store (v2f64 FPR128:$Rt), 4117 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4118 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4119 def : Pat<(store (v16i8 FPR128:$Rt), 4120 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4121 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4122 def : Pat<(store (v8i16 FPR128:$Rt), 4123 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4124 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4125 def : Pat<(store (v4i32 FPR128:$Rt), 4126 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4127 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4128 def : Pat<(store (v2i64 FPR128:$Rt), 4129 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4130 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4131 def : Pat<(store (v2f64 FPR128:$Rt), 4132 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4133 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4134 def : Pat<(store (v8f16 FPR128:$Rt), 4135 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4136 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4137 def : Pat<(store (v8bf16 FPR128:$Rt), 4138 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4139 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4140} 4141 4142} // AddedComplexity = 10 4143 4144// unscaled i64 truncating stores 4145def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), 4146 (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 4147def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 4148 (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 4149def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), 4150 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 4151 4152// Match stores from lane 0 to the appropriate subreg's store. 4153multiclass VecStoreULane0Pat<SDPatternOperator StoreOp, 4154 ValueType VTy, ValueType STy, 4155 ValueType SubRegTy, 4156 SubRegIndex SubRegIdx, Instruction STR> { 4157 defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegTy, SubRegIdx, simm9, STR>; 4158} 4159 4160let AddedComplexity = 19 in { 4161 defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>; 4162 defm : VecStoreULane0Pat<store, v8f16, f16, f16, hsub, STURHi>; 4163 defm : VecStoreULane0Pat<store, v4i32, i32, i32, ssub, STURSi>; 4164 defm : VecStoreULane0Pat<store, v4f32, f32, i32, ssub, STURSi>; 4165 defm : VecStoreULane0Pat<store, v2i64, i64, i64, dsub, STURDi>; 4166 defm : VecStoreULane0Pat<store, v2f64, f64, i64, dsub, STURDi>; 4167} 4168 4169//--- 4170// STR mnemonics fall back to STUR for negative or unaligned offsets. 4171def : InstAlias<"str $Rt, [$Rn, $offset]", 4172 (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 4173def : InstAlias<"str $Rt, [$Rn, $offset]", 4174 (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 4175def : InstAlias<"str $Rt, [$Rn, $offset]", 4176 (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 4177def : InstAlias<"str $Rt, [$Rn, $offset]", 4178 (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 4179def : InstAlias<"str $Rt, [$Rn, $offset]", 4180 (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 4181def : InstAlias<"str $Rt, [$Rn, $offset]", 4182 (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 4183def : InstAlias<"str $Rt, [$Rn, $offset]", 4184 (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 4185 4186def : InstAlias<"strb $Rt, [$Rn, $offset]", 4187 (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 4188def : InstAlias<"strh $Rt, [$Rn, $offset]", 4189 (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 4190 4191//--- 4192// (unscaled immediate, unprivileged) 4193defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; 4194defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; 4195 4196defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; 4197defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; 4198 4199//--- 4200// (immediate pre-indexed) 4201def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; 4202def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; 4203let Predicates = [HasFPARMv8] in { 4204def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, i8>; 4205def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; 4206def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; 4207def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; 4208def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; 4209} 4210 4211def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; 4212def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; 4213 4214// truncstore i64 4215def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4216 (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4217 simm9:$off)>; 4218def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4219 (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4220 simm9:$off)>; 4221def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4222 (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4223 simm9:$off)>; 4224 4225def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4226 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4227def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4228 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4229def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4230 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4231def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4232 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4233def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4234 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4235def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4236 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4237def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4238 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4239 4240def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4241 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4242def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4243 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4244def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4245 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4246def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4247 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4248def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4249 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4250def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4251 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4252def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4253 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4254 4255//--- 4256// (immediate post-indexed) 4257def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; 4258def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; 4259let Predicates = [HasFPARMv8] in { 4260def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, i8>; 4261def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; 4262def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; 4263def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; 4264def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>; 4265} 4266 4267def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>; 4268def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>; 4269 4270// truncstore i64 4271def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4272 (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4273 simm9:$off)>; 4274def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4275 (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4276 simm9:$off)>; 4277def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4278 (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4279 simm9:$off)>; 4280 4281def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), 4282 (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; 4283 4284def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4285 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4286def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4287 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4288def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4289 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4290def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4291 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4292def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4293 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4294def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4295 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4296def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4297 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4298def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4299 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4300 4301def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4302 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4303def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4304 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4305def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4306 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4307def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4308 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4309def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4310 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4311def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4312 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4313def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4314 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4315def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4316 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4317 4318//===----------------------------------------------------------------------===// 4319// Load/store exclusive instructions. 4320//===----------------------------------------------------------------------===// 4321 4322def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; 4323def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; 4324def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; 4325def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; 4326 4327def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; 4328def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; 4329def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; 4330def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; 4331 4332def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; 4333def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; 4334def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; 4335def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; 4336 4337def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; 4338def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; 4339def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; 4340def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; 4341 4342/* 4343Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn 4344of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an 4345alias for the case of immediate #0. This is because new STLR versions (from 4346LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not 4347appropriate anymore (it parses and discards the optional zero). This is not the 4348case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed, 4349and the immediate values are not inside the [] brackets and thus not accepted 4350by GPR64sp0 parser. 4351*/ 4352def STLRW0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW GPR32: $Rt, GPR64sp:$Rn)>; 4353def STLRX0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX GPR64: $Rt, GPR64sp:$Rn)>; 4354def STLRB0 : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB GPR32: $Rt, GPR64sp:$Rn)>; 4355def STLRH0 : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH GPR32: $Rt, GPR64sp:$Rn)>; 4356 4357def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; 4358def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; 4359def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; 4360def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; 4361 4362def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; 4363def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; 4364def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; 4365def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; 4366 4367def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; 4368def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; 4369 4370def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; 4371def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; 4372 4373def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; 4374def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; 4375 4376def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; 4377def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; 4378 4379let Predicates = [HasLOR] in { 4380 // v8.1a "Limited Order Region" extension load-acquire instructions 4381 def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; 4382 def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; 4383 def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; 4384 def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; 4385 4386 // v8.1a "Limited Order Region" extension store-release instructions 4387 def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; 4388 def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; 4389 def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; 4390 def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; 4391 4392 // Aliases for when offset=0 4393 def STLLRW0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRW GPR32: $Rt, GPR64sp:$Rn)>; 4394 def STLLRX0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRX GPR64: $Rt, GPR64sp:$Rn)>; 4395 def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]", (STLLRB GPR32: $Rt, GPR64sp:$Rn)>; 4396 def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]", (STLLRH GPR32: $Rt, GPR64sp:$Rn)>; 4397} 4398 4399//===----------------------------------------------------------------------===// 4400// Scaled floating point to integer conversion instructions. 4401//===----------------------------------------------------------------------===// 4402 4403defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; 4404defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; 4405defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; 4406defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; 4407defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; 4408defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; 4409defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; 4410defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; 4411defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4412defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4413defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4414defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4415 4416// AArch64's FCVT instructions saturate when out of range. 4417multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> { 4418 let Predicates = [HasFullFP16] in { 4419 def : Pat<(i32 (to_int_sat f16:$Rn, i32)), 4420 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4421 def : Pat<(i64 (to_int_sat f16:$Rn, i64)), 4422 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4423 } 4424 def : Pat<(i32 (to_int_sat f32:$Rn, i32)), 4425 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4426 def : Pat<(i64 (to_int_sat f32:$Rn, i64)), 4427 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4428 def : Pat<(i32 (to_int_sat f64:$Rn, i32)), 4429 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4430 def : Pat<(i64 (to_int_sat f64:$Rn, i64)), 4431 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4432 4433 let Predicates = [HasFullFP16] in { 4434 def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), 4435 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4436 def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), 4437 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4438 } 4439 def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), 4440 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4441 def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), 4442 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4443 def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), 4444 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4445 def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), 4446 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4447} 4448 4449defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">; 4450defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">; 4451 4452multiclass FPToIntegerIntPats<Intrinsic round, string INST> { 4453 let Predicates = [HasFullFP16] in { 4454 def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>; 4455 def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>; 4456 } 4457 def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>; 4458 def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>; 4459 def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>; 4460 def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>; 4461 4462 let Predicates = [HasFullFP16] in { 4463 def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), 4464 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4465 def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), 4466 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4467 } 4468 def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), 4469 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4470 def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), 4471 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4472 def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), 4473 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4474 def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), 4475 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4476} 4477 4478defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">; 4479defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">; 4480 4481multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> { 4482 def : Pat<(i32 (to_int (round f32:$Rn))), 4483 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4484 def : Pat<(i64 (to_int (round f32:$Rn))), 4485 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4486 def : Pat<(i32 (to_int (round f64:$Rn))), 4487 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4488 def : Pat<(i64 (to_int (round f64:$Rn))), 4489 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4490 4491 // These instructions saturate like fp_to_[su]int_sat. 4492 let Predicates = [HasFullFP16] in { 4493 def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), 4494 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4495 def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), 4496 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4497 } 4498 def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), 4499 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4500 def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), 4501 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4502 def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), 4503 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4504 def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), 4505 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4506} 4507 4508defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">; 4509defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">; 4510defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">; 4511defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">; 4512defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">; 4513defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">; 4514defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">; 4515defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">; 4516 4517 4518 4519let Predicates = [HasFullFP16] in { 4520 def : Pat<(i32 (any_lround f16:$Rn)), 4521 (FCVTASUWHr f16:$Rn)>; 4522 def : Pat<(i64 (any_lround f16:$Rn)), 4523 (FCVTASUXHr f16:$Rn)>; 4524 def : Pat<(i64 (any_llround f16:$Rn)), 4525 (FCVTASUXHr f16:$Rn)>; 4526} 4527def : Pat<(i32 (any_lround f32:$Rn)), 4528 (FCVTASUWSr f32:$Rn)>; 4529def : Pat<(i32 (any_lround f64:$Rn)), 4530 (FCVTASUWDr f64:$Rn)>; 4531def : Pat<(i64 (any_lround f32:$Rn)), 4532 (FCVTASUXSr f32:$Rn)>; 4533def : Pat<(i64 (any_lround f64:$Rn)), 4534 (FCVTASUXDr f64:$Rn)>; 4535def : Pat<(i64 (any_llround f32:$Rn)), 4536 (FCVTASUXSr f32:$Rn)>; 4537def : Pat<(i64 (any_llround f64:$Rn)), 4538 (FCVTASUXDr f64:$Rn)>; 4539 4540//===----------------------------------------------------------------------===// 4541// Scaled integer to floating point conversion instructions. 4542//===----------------------------------------------------------------------===// 4543 4544defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; 4545defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; 4546 4547def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), 4548 (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>; 4549def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)), 4550 (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>; 4551def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)), 4552 (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>; 4553 4554def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)), 4555 (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>; 4556def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)), 4557 (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>; 4558def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)), 4559 (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>; 4560 4561def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)), 4562 (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>; 4563def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)), 4564 (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>; 4565def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)), 4566 (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>; 4567 4568def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), 4569 (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>; 4570def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)), 4571 (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>; 4572def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)), 4573 (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>; 4574 4575//===----------------------------------------------------------------------===// 4576// Unscaled integer to floating point conversion instruction. 4577//===----------------------------------------------------------------------===// 4578 4579defm FMOV : UnscaledConversion<"fmov">; 4580 4581// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable 4582let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1, 4583 Predicates = [HasFPARMv8] in { 4584def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, 4585 Sched<[WriteF]>; 4586def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, 4587 Sched<[WriteF]>; 4588def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, 4589 Sched<[WriteF]>; 4590} 4591 4592// Similarly add aliases 4593def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, 4594 Requires<[HasFullFP16]>; 4595def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; 4596def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; 4597 4598def : Pat<(bf16 fpimm0), 4599 (FMOVH0)>; 4600 4601// Pattern for FP16 and BF16 immediates 4602let Predicates = [HasFullFP16] in { 4603 def : Pat<(f16 fpimm:$in), 4604 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; 4605 4606 def : Pat<(bf16 fpimm:$in), 4607 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>; 4608} 4609 4610//===----------------------------------------------------------------------===// 4611// Floating point conversion instruction. 4612//===----------------------------------------------------------------------===// 4613 4614defm FCVT : FPConversion<"fcvt">; 4615 4616//===----------------------------------------------------------------------===// 4617// Floating point single operand instructions. 4618//===----------------------------------------------------------------------===// 4619 4620defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>; 4621defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">; 4622defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>; 4623defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; 4624defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; 4625defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; 4626defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; 4627defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; 4628 4629defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; 4630defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; 4631 4632let SchedRW = [WriteFDiv] in { 4633defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; 4634} 4635 4636let Predicates = [HasFRInt3264] in { 4637 defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>; 4638 defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>; 4639 defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>; 4640 defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; 4641} // HasFRInt3264 4642 4643// Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions 4644def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))), 4645 (FRINT32ZDr FPR64:$Rn)>; 4646def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))), 4647 (FRINT64ZDr FPR64:$Rn)>; 4648def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))), 4649 (FRINT32XDr FPR64:$Rn)>; 4650def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))), 4651 (FRINT64XDr FPR64:$Rn)>; 4652 4653// Emitting strict_lrint as two instructions is valid as any exceptions that 4654// occur will happen in exactly one of the instructions (e.g. if the input is 4655// not an integer the inexact exception will happen in the FRINTX but not then 4656// in the FCVTZS as the output of FRINTX is an integer). 4657let Predicates = [HasFullFP16] in { 4658 def : Pat<(i32 (any_lrint f16:$Rn)), 4659 (FCVTZSUWHr (FRINTXHr f16:$Rn))>; 4660 def : Pat<(i64 (any_lrint f16:$Rn)), 4661 (FCVTZSUXHr (FRINTXHr f16:$Rn))>; 4662 def : Pat<(i64 (any_llrint f16:$Rn)), 4663 (FCVTZSUXHr (FRINTXHr f16:$Rn))>; 4664} 4665def : Pat<(i32 (any_lrint f32:$Rn)), 4666 (FCVTZSUWSr (FRINTXSr f32:$Rn))>; 4667def : Pat<(i32 (any_lrint f64:$Rn)), 4668 (FCVTZSUWDr (FRINTXDr f64:$Rn))>; 4669def : Pat<(i64 (any_lrint f32:$Rn)), 4670 (FCVTZSUXSr (FRINTXSr f32:$Rn))>; 4671def : Pat<(i64 (any_lrint f64:$Rn)), 4672 (FCVTZSUXDr (FRINTXDr f64:$Rn))>; 4673def : Pat<(i64 (any_llrint f32:$Rn)), 4674 (FCVTZSUXSr (FRINTXSr f32:$Rn))>; 4675def : Pat<(i64 (any_llrint f64:$Rn)), 4676 (FCVTZSUXDr (FRINTXDr f64:$Rn))>; 4677 4678//===----------------------------------------------------------------------===// 4679// Floating point two operand instructions. 4680//===----------------------------------------------------------------------===// 4681 4682defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; 4683let SchedRW = [WriteFDiv] in { 4684defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; 4685} 4686defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; 4687defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; 4688defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; 4689defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; 4690let SchedRW = [WriteFMul] in { 4691defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; 4692defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; 4693} 4694defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; 4695 4696multiclass FMULScalarFromIndexedLane0Patterns<string inst, 4697 string inst_f16_suffix, 4698 string inst_f32_suffix, 4699 string inst_f64_suffix, 4700 SDPatternOperator OpNode, 4701 list<Predicate> preds = []> { 4702 let Predicates = !listconcat(preds, [HasFullFP16]) in { 4703 def : Pat<(f16 (OpNode (f16 FPR16:$Rn), 4704 (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))), 4705 (!cast<Instruction>(inst # inst_f16_suffix) 4706 FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)))>; 4707 } 4708 let Predicates = preds in { 4709 def : Pat<(f32 (OpNode (f32 FPR32:$Rn), 4710 (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))), 4711 (!cast<Instruction>(inst # inst_f32_suffix) 4712 FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>; 4713 def : Pat<(f64 (OpNode (f64 FPR64:$Rn), 4714 (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))), 4715 (!cast<Instruction>(inst # inst_f64_suffix) 4716 FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>; 4717 } 4718} 4719 4720defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr", 4721 any_fmul>; 4722 4723// Match reassociated forms of FNMUL. 4724def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)), 4725 (FNMULHrr FPR16:$a, FPR16:$b)>, 4726 Requires<[HasFullFP16]>; 4727def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)), 4728 (FNMULSrr FPR32:$a, FPR32:$b)>; 4729def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)), 4730 (FNMULDrr FPR64:$a, FPR64:$b)>; 4731 4732def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4733 (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; 4734def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4735 (FMINDrr FPR64:$Rn, FPR64:$Rm)>; 4736def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4737 (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; 4738def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4739 (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; 4740 4741//===----------------------------------------------------------------------===// 4742// Floating point three operand instructions. 4743//===----------------------------------------------------------------------===// 4744 4745defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; 4746defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", 4747 TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; 4748defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", 4749 TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; 4750defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", 4751 TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; 4752 4753// The following def pats catch the case where the LHS of an FMA is negated. 4754// The TriOpFrag above catches the case where the middle operand is negated. 4755 4756// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike 4757// the NEON variant. 4758 4759// Here we handle first -(a + b*c) for FNMADD: 4760 4761let Predicates = [HasNEON, HasFullFP16] in 4762def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), 4763 (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4764 4765def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), 4766 (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4767 4768def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), 4769 (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4770 4771// Now it's time for "(-a) + (-b)*c" 4772 4773let Predicates = [HasNEON, HasFullFP16] in 4774def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), 4775 (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4776 4777def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), 4778 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4779 4780def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), 4781 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4782 4783//===----------------------------------------------------------------------===// 4784// Floating point comparison instructions. 4785//===----------------------------------------------------------------------===// 4786 4787defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>; 4788defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>; 4789 4790//===----------------------------------------------------------------------===// 4791// Floating point conditional comparison instructions. 4792//===----------------------------------------------------------------------===// 4793 4794defm FCCMPE : FPCondComparison<1, "fccmpe">; 4795defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; 4796 4797//===----------------------------------------------------------------------===// 4798// Floating point conditional select instruction. 4799//===----------------------------------------------------------------------===// 4800 4801defm FCSEL : FPCondSelect<"fcsel">; 4802 4803let Predicates = [HasFullFP16] in 4804def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)), 4805 (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>; 4806 4807// CSEL instructions providing f128 types need to be handled by a 4808// pseudo-instruction since the eventual code will need to introduce basic 4809// blocks and control flow. 4810let Predicates = [HasFPARMv8] in 4811def F128CSEL : Pseudo<(outs FPR128:$Rd), 4812 (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), 4813 [(set (f128 FPR128:$Rd), 4814 (AArch64csel FPR128:$Rn, FPR128:$Rm, 4815 (i32 imm:$cond), NZCV))]> { 4816 let Uses = [NZCV]; 4817 let usesCustomInserter = 1; 4818 let hasNoSchedulingInfo = 1; 4819} 4820 4821//===----------------------------------------------------------------------===// 4822// Instructions used for emitting unwind opcodes on ARM64 Windows. 4823//===----------------------------------------------------------------------===// 4824let isPseudo = 1 in { 4825 def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>; 4826 def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4827 def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4828 def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4829 def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4830 def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4831 def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4832 def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4833 def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4834 def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4835 def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4836 def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>; 4837 def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4838 def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>; 4839 def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4840 def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>; 4841 def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4842 def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>; 4843 def SEH_SaveAnyRegQP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4844 def SEH_SaveAnyRegQPX : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4845} 4846 4847// Pseudo instructions for Windows EH 4848//===----------------------------------------------------------------------===// 4849let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, 4850 isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in { 4851 def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>; 4852 let usesCustomInserter = 1 in 4853 def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>, 4854 Sched<[]>; 4855} 4856 4857// Pseudo instructions for homogeneous prolog/epilog 4858let isPseudo = 1 in { 4859 // Save CSRs in order, {FPOffset} 4860 def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4861 // Restore CSRs in order 4862 def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4863} 4864 4865//===----------------------------------------------------------------------===// 4866// Floating point immediate move. 4867//===----------------------------------------------------------------------===// 4868 4869let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 4870defm FMOV : FPMoveImmediate<"fmov">; 4871} 4872 4873let Predicates = [HasFullFP16] in { 4874 def : Pat<(bf16 fpimmbf16:$in), 4875 (FMOVHi (fpimm16XForm bf16:$in))>; 4876} 4877 4878//===----------------------------------------------------------------------===// 4879// Advanced SIMD two vector instructions. 4880//===----------------------------------------------------------------------===// 4881 4882defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", 4883 AArch64uabd>; 4884// Match UABDL in log2-shuffle patterns. 4885def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), 4886 (zext (v8i8 V64:$opB))))), 4887 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4888def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4889 (v8i16 (add (sub (zext (v8i8 V64:$opA)), 4890 (zext (v8i8 V64:$opB))), 4891 (AArch64vashr v8i16:$src, (i32 15))))), 4892 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4893def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4894 (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), 4895 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4896def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4897 (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4898 (zext (extract_high_v16i8 (v16i8 V128:$opB)))), 4899 (AArch64vashr v8i16:$src, (i32 15))))), 4900 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4901def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), 4902 (zext (v4i16 V64:$opB))))), 4903 (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; 4904def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))), 4905 (zext (extract_high_v8i16 (v8i16 V128:$opB)))))), 4906 (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; 4907def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), 4908 (zext (v2i32 V64:$opB))))), 4909 (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; 4910def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))), 4911 (zext (extract_high_v4i32 (v4i32 V128:$opB)))))), 4912 (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; 4913 4914defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; 4915defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; 4916defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; 4917defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; 4918defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; 4919defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; 4920defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; 4921defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; 4922defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; 4923defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>; 4924 4925def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), 4926 (CMLTv8i8rz V64:$Rn)>; 4927def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))), 4928 (CMLTv4i16rz V64:$Rn)>; 4929def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))), 4930 (CMLTv2i32rz V64:$Rn)>; 4931def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))), 4932 (CMLTv16i8rz V128:$Rn)>; 4933def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))), 4934 (CMLTv8i16rz V128:$Rn)>; 4935def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))), 4936 (CMLTv4i32rz V128:$Rn)>; 4937def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))), 4938 (CMLTv2i64rz V128:$Rn)>; 4939 4940defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 4941defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 4942defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 4943defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 4944defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 4945defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; 4946defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; 4947defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; 4948def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), 4949 (FCVTLv4i16 V64:$Rn)>; 4950def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), 4951 (i64 4)))), 4952 (FCVTLv8i16 V128:$Rn)>; 4953def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), 4954 (FCVTLv2i32 V64:$Rn)>; 4955def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))), 4956 (FCVTLv4i32 V128:$Rn)>; 4957def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), 4958 (FCVTLv4i16 V64:$Rn)>; 4959def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))), 4960 (FCVTLv8i16 V128:$Rn)>; 4961 4962defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; 4963defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; 4964defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; 4965defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; 4966defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; 4967def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), 4968 (FCVTNv4i16 V128:$Rn)>; 4969def : Pat<(concat_vectors V64:$Rd, 4970 (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), 4971 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4972def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), 4973 (FCVTNv2i32 V128:$Rn)>; 4974def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), 4975 (FCVTNv4i16 V128:$Rn)>; 4976def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), 4977 (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4978def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))), 4979 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4980defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; 4981defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; 4982defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", 4983 int_aarch64_neon_fcvtxn>; 4984defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; 4985defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; 4986 4987// AArch64's FCVT instructions saturate when out of range. 4988multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> { 4989 let Predicates = [HasFullFP16] in { 4990 def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)), 4991 (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>; 4992 def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)), 4993 (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>; 4994 } 4995 def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)), 4996 (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>; 4997 def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)), 4998 (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>; 4999 def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)), 5000 (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>; 5001} 5002defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">; 5003defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">; 5004 5005def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; 5006def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; 5007def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; 5008def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>; 5009def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>; 5010 5011def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>; 5012def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>; 5013def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; 5014def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; 5015def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; 5016 5017defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>; 5018defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; 5019defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; 5020defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; 5021defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; 5022defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; 5023defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; 5024defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; 5025defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; 5026 5027let Predicates = [HasFRInt3264] in { 5028 defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; 5029 defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>; 5030 defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>; 5031 defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>; 5032} // HasFRInt3264 5033 5034defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; 5035defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; 5036defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", 5037 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 5038defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; 5039// Aliases for MVN -> NOT. 5040def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", 5041 (NOTv8i8 V64:$Vd, V64:$Vn)>; 5042def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", 5043 (NOTv16i8 V128:$Vd, V128:$Vn)>; 5044 5045def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 5046def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 5047def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 5048def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 5049def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 5050def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 5051 5052defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>; 5053defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; 5054defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; 5055defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; 5056defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", 5057 BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >; 5058defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>; 5059defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; 5060defm SHLL : SIMDVectorLShiftLongBySizeBHS; 5061defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 5062defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 5063defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; 5064defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; 5065defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; 5066defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", 5067 BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; 5068defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; 5069defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; 5070defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; 5071defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; 5072defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; 5073defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; 5074defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; 5075 5076def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 5077def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 5078def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 5079def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 5080def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 5081def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 5082def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 5083def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 5084def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; 5085def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; 5086 5087// Patterns for vector long shift (by element width). These need to match all 5088// three of zext, sext and anyext so it's easier to pull the patterns out of the 5089// definition. 5090multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { 5091 def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), 5092 (SHLLv8i8 V64:$Rn)>; 5093 def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)), 5094 (SHLLv16i8 V128:$Rn)>; 5095 def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), 5096 (SHLLv4i16 V64:$Rn)>; 5097 def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)), 5098 (SHLLv8i16 V128:$Rn)>; 5099 def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), 5100 (SHLLv2i32 V64:$Rn)>; 5101 def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)), 5102 (SHLLv4i32 V128:$Rn)>; 5103} 5104 5105defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; 5106defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; 5107defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; 5108 5109// Constant vector values, used in the S/UQXTN patterns below. 5110def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>; 5111def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>; 5112def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>; 5113def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>; 5114def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>; 5115def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>; 5116 5117// trunc(umin(X, 255)) -> UQXTRN v8i8 5118def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))), 5119 (UQXTNv8i8 V128:$Vn)>; 5120// trunc(umin(X, 65535)) -> UQXTRN v4i16 5121def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))), 5122 (UQXTNv4i16 V128:$Vn)>; 5123// trunc(smin(smax(X, -128), 128)) -> SQXTRN 5124// with reversed min/max 5125def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 5126 (v8i16 VImm7F)))), 5127 (SQXTNv8i8 V128:$Vn)>; 5128def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 5129 (v8i16 VImm80)))), 5130 (SQXTNv8i8 V128:$Vn)>; 5131// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN 5132// with reversed min/max 5133def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 5134 (v4i32 VImm7FFF)))), 5135 (SQXTNv4i16 V128:$Vn)>; 5136def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 5137 (v4i32 VImm8000)))), 5138 (SQXTNv4i16 V128:$Vn)>; 5139 5140// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) 5141// with reversed min/max 5142def : Pat<(v16i8 (concat_vectors 5143 (v8i8 V64:$Vd), 5144 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 5145 (v8i16 VImm7F)))))), 5146 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5147def : Pat<(v16i8 (concat_vectors 5148 (v8i8 V64:$Vd), 5149 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 5150 (v8i16 VImm80)))))), 5151 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5152 5153// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) 5154// with reversed min/max 5155def : Pat<(v8i16 (concat_vectors 5156 (v4i16 V64:$Vd), 5157 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 5158 (v4i32 VImm7FFF)))))), 5159 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5160def : Pat<(v8i16 (concat_vectors 5161 (v4i16 V64:$Vd), 5162 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 5163 (v4i32 VImm8000)))))), 5164 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5165 5166//===----------------------------------------------------------------------===// 5167// Advanced SIMD three vector instructions. 5168//===----------------------------------------------------------------------===// 5169 5170defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; 5171defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>; 5172defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; 5173defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; 5174defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; 5175defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; 5176defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; 5177defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; 5178foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in { 5179def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>; 5180} 5181defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; 5182let Predicates = [HasNEON] in { 5183foreach VT = [ v2f32, v4f32, v2f64 ] in 5184def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 5185} 5186let Predicates = [HasNEON, HasFullFP16] in { 5187foreach VT = [ v4f16, v8f16 ] in 5188def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 5189} 5190defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>; 5191defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>; 5192defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>; 5193defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; 5194defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 5195defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 5196defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 5197defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; 5198defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; 5199defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; 5200defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; 5201defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; 5202defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; 5203defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; 5204defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; 5205defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; 5206 5207// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the 5208// instruction expects the addend first, while the fma intrinsic puts it last. 5209defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", 5210 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 5211defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", 5212 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 5213 5214defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; 5215defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; 5216defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; 5217defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; 5218defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; 5219 5220// MLA and MLS are generated in MachineCombine 5221defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; 5222defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; 5223 5224defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; 5225defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; 5226defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", 5227 TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; 5228defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; 5229defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>; 5230defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; 5231defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; 5232defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; 5233defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; 5234defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; 5235defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; 5236defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; 5237defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; 5238defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; 5239defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; 5240defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; 5241defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>; 5242defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; 5243defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; 5244defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; 5245defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", 5246 TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; 5247defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; 5248defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>; 5249defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; 5250defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; 5251defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; 5252defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; 5253defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; 5254defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; 5255defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; 5256defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; 5257defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; 5258defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>; 5259defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; 5260defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; 5261defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", 5262 int_aarch64_neon_sqrdmlah>; 5263defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", 5264 int_aarch64_neon_sqrdmlsh>; 5265 5266// Extra saturate patterns, other than the intrinsics matches above 5267defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>; 5268defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>; 5269defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>; 5270defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>; 5271 5272defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; 5273defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", 5274 BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; 5275defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; 5276defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", 5277 BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; 5278defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; 5279 5280// Pseudo bitwise select pattern BSP. 5281// It is expanded into BSL/BIT/BIF after register allocation. 5282defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS), 5283 (and (vnot node:$LHS), node:$RHS))>>; 5284defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">; 5285defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; 5286defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">; 5287 5288def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), 5289 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5290def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), 5291 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5292def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), 5293 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5294def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), 5295 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5296 5297def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), 5298 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5299def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), 5300 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5301def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), 5302 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5303def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), 5304 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5305 5306def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", 5307 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; 5308def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", 5309 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5310def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", 5311 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5312def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", 5313 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5314 5315def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", 5316 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; 5317def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", 5318 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5319def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", 5320 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5321def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", 5322 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5323 5324def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # 5325 "|cmls.8b\t$dst, $src1, $src2}", 5326 (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5327def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # 5328 "|cmls.16b\t$dst, $src1, $src2}", 5329 (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5330def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # 5331 "|cmls.4h\t$dst, $src1, $src2}", 5332 (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5333def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # 5334 "|cmls.8h\t$dst, $src1, $src2}", 5335 (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5336def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # 5337 "|cmls.2s\t$dst, $src1, $src2}", 5338 (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5339def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # 5340 "|cmls.4s\t$dst, $src1, $src2}", 5341 (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5342def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # 5343 "|cmls.2d\t$dst, $src1, $src2}", 5344 (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5345 5346def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # 5347 "|cmlo.8b\t$dst, $src1, $src2}", 5348 (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5349def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # 5350 "|cmlo.16b\t$dst, $src1, $src2}", 5351 (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5352def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # 5353 "|cmlo.4h\t$dst, $src1, $src2}", 5354 (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5355def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # 5356 "|cmlo.8h\t$dst, $src1, $src2}", 5357 (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5358def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # 5359 "|cmlo.2s\t$dst, $src1, $src2}", 5360 (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5361def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # 5362 "|cmlo.4s\t$dst, $src1, $src2}", 5363 (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5364def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # 5365 "|cmlo.2d\t$dst, $src1, $src2}", 5366 (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5367 5368def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # 5369 "|cmle.8b\t$dst, $src1, $src2}", 5370 (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5371def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # 5372 "|cmle.16b\t$dst, $src1, $src2}", 5373 (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5374def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # 5375 "|cmle.4h\t$dst, $src1, $src2}", 5376 (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5377def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # 5378 "|cmle.8h\t$dst, $src1, $src2}", 5379 (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5380def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # 5381 "|cmle.2s\t$dst, $src1, $src2}", 5382 (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5383def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # 5384 "|cmle.4s\t$dst, $src1, $src2}", 5385 (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5386def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # 5387 "|cmle.2d\t$dst, $src1, $src2}", 5388 (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5389 5390def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # 5391 "|cmlt.8b\t$dst, $src1, $src2}", 5392 (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5393def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # 5394 "|cmlt.16b\t$dst, $src1, $src2}", 5395 (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5396def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # 5397 "|cmlt.4h\t$dst, $src1, $src2}", 5398 (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5399def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # 5400 "|cmlt.8h\t$dst, $src1, $src2}", 5401 (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5402def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # 5403 "|cmlt.2s\t$dst, $src1, $src2}", 5404 (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5405def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # 5406 "|cmlt.4s\t$dst, $src1, $src2}", 5407 (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5408def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # 5409 "|cmlt.2d\t$dst, $src1, $src2}", 5410 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5411 5412let Predicates = [HasNEON, HasFullFP16] in { 5413def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # 5414 "|fcmle.4h\t$dst, $src1, $src2}", 5415 (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5416def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # 5417 "|fcmle.8h\t$dst, $src1, $src2}", 5418 (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5419} 5420def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # 5421 "|fcmle.2s\t$dst, $src1, $src2}", 5422 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5423def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # 5424 "|fcmle.4s\t$dst, $src1, $src2}", 5425 (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5426def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # 5427 "|fcmle.2d\t$dst, $src1, $src2}", 5428 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5429 5430let Predicates = [HasNEON, HasFullFP16] in { 5431def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # 5432 "|fcmlt.4h\t$dst, $src1, $src2}", 5433 (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5434def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # 5435 "|fcmlt.8h\t$dst, $src1, $src2}", 5436 (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5437} 5438def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # 5439 "|fcmlt.2s\t$dst, $src1, $src2}", 5440 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5441def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # 5442 "|fcmlt.4s\t$dst, $src1, $src2}", 5443 (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5444def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # 5445 "|fcmlt.2d\t$dst, $src1, $src2}", 5446 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5447 5448let Predicates = [HasNEON, HasFullFP16] in { 5449def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # 5450 "|facle.4h\t$dst, $src1, $src2}", 5451 (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5452def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # 5453 "|facle.8h\t$dst, $src1, $src2}", 5454 (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5455} 5456def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # 5457 "|facle.2s\t$dst, $src1, $src2}", 5458 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5459def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # 5460 "|facle.4s\t$dst, $src1, $src2}", 5461 (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5462def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # 5463 "|facle.2d\t$dst, $src1, $src2}", 5464 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5465 5466let Predicates = [HasNEON, HasFullFP16] in { 5467def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # 5468 "|faclt.4h\t$dst, $src1, $src2}", 5469 (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5470def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # 5471 "|faclt.8h\t$dst, $src1, $src2}", 5472 (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5473} 5474def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # 5475 "|faclt.2s\t$dst, $src1, $src2}", 5476 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5477def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # 5478 "|faclt.4s\t$dst, $src1, $src2}", 5479 (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5480def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # 5481 "|faclt.2d\t$dst, $src1, $src2}", 5482 (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5483 5484//===----------------------------------------------------------------------===// 5485// Advanced SIMD three scalar instructions. 5486//===----------------------------------------------------------------------===// 5487 5488defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; 5489defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; 5490defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; 5491defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; 5492defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; 5493defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; 5494defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; 5495defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; 5496def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 5497 (FABD64 FPR64:$Rn, FPR64:$Rm)>; 5498let Predicates = [HasNEON, HasFullFP16] in { 5499def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; 5500} 5501let Predicates = [HasNEON] in { 5502def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; 5503def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; 5504} 5505defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", 5506 int_aarch64_neon_facge>; 5507defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", 5508 int_aarch64_neon_facgt>; 5509defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 5510defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 5511defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 5512defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>; 5513defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>; 5514defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>; 5515defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; 5516defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; 5517defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 5518defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; 5519defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; 5520defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; 5521defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; 5522defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; 5523defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; 5524defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; 5525defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; 5526defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; 5527defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; 5528defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; 5529defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; 5530let Predicates = [HasRDM] in { 5531 defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; 5532 defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; 5533 def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5534 (i32 FPR32:$Rm))), 5535 (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5536 def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5537 (i32 FPR32:$Rm))), 5538 (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5539} 5540 5541defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64", 5542 int_aarch64_neon_fmulx, 5543 [HasNEONorSME]>; 5544 5545def : InstAlias<"cmls $dst, $src1, $src2", 5546 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5547def : InstAlias<"cmle $dst, $src1, $src2", 5548 (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5549def : InstAlias<"cmlo $dst, $src1, $src2", 5550 (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5551def : InstAlias<"cmlt $dst, $src1, $src2", 5552 (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5553def : InstAlias<"fcmle $dst, $src1, $src2", 5554 (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5555def : InstAlias<"fcmle $dst, $src1, $src2", 5556 (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5557def : InstAlias<"fcmlt $dst, $src1, $src2", 5558 (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5559def : InstAlias<"fcmlt $dst, $src1, $src2", 5560 (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5561def : InstAlias<"facle $dst, $src1, $src2", 5562 (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5563def : InstAlias<"facle $dst, $src1, $src2", 5564 (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5565def : InstAlias<"faclt $dst, $src1, $src2", 5566 (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5567def : InstAlias<"faclt $dst, $src1, $src2", 5568 (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5569 5570//===----------------------------------------------------------------------===// 5571// Advanced SIMD three scalar instructions (mixed operands). 5572//===----------------------------------------------------------------------===// 5573defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", 5574 int_aarch64_neon_sqdmulls_scalar>; 5575defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; 5576defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; 5577 5578def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), 5579 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5580 (i32 FPR32:$Rm))))), 5581 (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5582def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), 5583 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5584 (i32 FPR32:$Rm))))), 5585 (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5586 5587//===----------------------------------------------------------------------===// 5588// Advanced SIMD two scalar instructions. 5589//===----------------------------------------------------------------------===// 5590 5591defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs, [HasNoCSSC]>; 5592defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; 5593defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; 5594defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; 5595defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; 5596defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; 5597defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 5598defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 5599defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 5600defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 5601defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 5602defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; 5603defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; 5604defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; 5605defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; 5606defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; 5607defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; 5608defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; 5609defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; 5610def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; 5611defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; 5612defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; 5613defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>; 5614defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>; 5615defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>; 5616defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", 5617 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 5618defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; 5619defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 5620defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 5621defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; 5622defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; 5623defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", 5624 int_aarch64_neon_suqadd>; 5625defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; 5626defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; 5627defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", 5628 int_aarch64_neon_usqadd>; 5629 5630def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), 5631 (CMLTv1i64rz V64:$Rn)>; 5632 5633def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), 5634 (FCVTASv1i64 FPR64:$Rn)>; 5635def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), 5636 (FCVTAUv1i64 FPR64:$Rn)>; 5637def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), 5638 (FCVTMSv1i64 FPR64:$Rn)>; 5639def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), 5640 (FCVTMUv1i64 FPR64:$Rn)>; 5641def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), 5642 (FCVTNSv1i64 FPR64:$Rn)>; 5643def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), 5644 (FCVTNUv1i64 FPR64:$Rn)>; 5645def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), 5646 (FCVTPSv1i64 FPR64:$Rn)>; 5647def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), 5648 (FCVTPUv1i64 FPR64:$Rn)>; 5649def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), 5650 (FCVTZSv1i64 FPR64:$Rn)>; 5651def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), 5652 (FCVTZUv1i64 FPR64:$Rn)>; 5653 5654def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), 5655 (FRECPEv1f16 FPR16:$Rn)>; 5656def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), 5657 (FRECPEv1i32 FPR32:$Rn)>; 5658def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), 5659 (FRECPEv1i64 FPR64:$Rn)>; 5660def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), 5661 (FRECPEv1i64 FPR64:$Rn)>; 5662 5663def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), 5664 (FRECPEv1i32 FPR32:$Rn)>; 5665def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), 5666 (FRECPEv2f32 V64:$Rn)>; 5667def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), 5668 (FRECPEv4f32 FPR128:$Rn)>; 5669def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), 5670 (FRECPEv1i64 FPR64:$Rn)>; 5671def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), 5672 (FRECPEv1i64 FPR64:$Rn)>; 5673def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), 5674 (FRECPEv2f64 FPR128:$Rn)>; 5675 5676def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5677 (FRECPS32 FPR32:$Rn, FPR32:$Rm)>; 5678def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5679 (FRECPSv2f32 V64:$Rn, V64:$Rm)>; 5680def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5681 (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5682def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5683 (FRECPS64 FPR64:$Rn, FPR64:$Rm)>; 5684def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5685 (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5686 5687def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))), 5688 (FRECPXv1f16 FPR16:$Rn)>; 5689def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), 5690 (FRECPXv1i32 FPR32:$Rn)>; 5691def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), 5692 (FRECPXv1i64 FPR64:$Rn)>; 5693 5694def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))), 5695 (FRSQRTEv1f16 FPR16:$Rn)>; 5696def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), 5697 (FRSQRTEv1i32 FPR32:$Rn)>; 5698def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), 5699 (FRSQRTEv1i64 FPR64:$Rn)>; 5700def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), 5701 (FRSQRTEv1i64 FPR64:$Rn)>; 5702 5703def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), 5704 (FRSQRTEv1i32 FPR32:$Rn)>; 5705def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), 5706 (FRSQRTEv2f32 V64:$Rn)>; 5707def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), 5708 (FRSQRTEv4f32 FPR128:$Rn)>; 5709def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), 5710 (FRSQRTEv1i64 FPR64:$Rn)>; 5711def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), 5712 (FRSQRTEv1i64 FPR64:$Rn)>; 5713def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), 5714 (FRSQRTEv2f64 FPR128:$Rn)>; 5715 5716def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5717 (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>; 5718def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5719 (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>; 5720def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5721 (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5722def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5723 (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>; 5724def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5725 (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5726 5727// Some float -> int -> float conversion patterns for which we want to keep the 5728// int values in FP registers using the corresponding NEON instructions to 5729// avoid more costly int <-> fp register transfers. 5730let Predicates = [HasNEON] in { 5731def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), 5732 (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; 5733def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), 5734 (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; 5735def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), 5736 (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; 5737def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), 5738 (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; 5739 5740let Predicates = [HasFullFP16] in { 5741def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), 5742 (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; 5743def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), 5744 (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; 5745} 5746// If an integer is about to be converted to a floating point value, 5747// just load it on the floating point unit. 5748// Here are the patterns for 8 and 16-bits to float. 5749// 8-bits -> float. 5750multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy, 5751 SDPatternOperator loadop, Instruction UCVTF, 5752 ROAddrMode ro, Instruction LDRW, Instruction LDRX, 5753 SubRegIndex sub> { 5754 def : Pat<(DstTy (uint_to_fp (SrcTy 5755 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, 5756 ro.Wext:$extend))))), 5757 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5758 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 5759 sub))>; 5760 5761 def : Pat<(DstTy (uint_to_fp (SrcTy 5762 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, 5763 ro.Wext:$extend))))), 5764 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5765 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 5766 sub))>; 5767} 5768 5769defm : UIntToFPROLoadPat<f32, i32, zextloadi8, 5770 UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>; 5771def : Pat <(f32 (uint_to_fp (i32 5772 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5773 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5774 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5775def : Pat <(f32 (uint_to_fp (i32 5776 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5777 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5778 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5779// 16-bits -> float. 5780defm : UIntToFPROLoadPat<f32, i32, zextloadi16, 5781 UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>; 5782def : Pat <(f32 (uint_to_fp (i32 5783 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5784 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5785 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5786def : Pat <(f32 (uint_to_fp (i32 5787 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5788 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5789 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5790// 32-bits are handled in target specific dag combine: 5791// performIntToFpCombine. 5792// 64-bits integer to 32-bits floating point, not possible with 5793// UCVTF on floating point registers (both source and destination 5794// must have the same size). 5795 5796// Here are the patterns for 8, 16, 32, and 64-bits to double. 5797// 8-bits -> double. 5798defm : UIntToFPROLoadPat<f64, i32, zextloadi8, 5799 UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>; 5800def : Pat <(f64 (uint_to_fp (i32 5801 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5802 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5803 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5804def : Pat <(f64 (uint_to_fp (i32 5805 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5806 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5807 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5808// 16-bits -> double. 5809defm : UIntToFPROLoadPat<f64, i32, zextloadi16, 5810 UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>; 5811def : Pat <(f64 (uint_to_fp (i32 5812 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5813 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5814 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5815def : Pat <(f64 (uint_to_fp (i32 5816 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5817 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5818 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5819// 32-bits -> double. 5820defm : UIntToFPROLoadPat<f64, i32, load, 5821 UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>; 5822def : Pat <(f64 (uint_to_fp (i32 5823 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 5824 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5825 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; 5826def : Pat <(f64 (uint_to_fp (i32 5827 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), 5828 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5829 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; 5830// 64-bits -> double are handled in target specific dag combine: 5831// performIntToFpCombine. 5832} // let Predicates = [HasNEON] 5833 5834//===----------------------------------------------------------------------===// 5835// Advanced SIMD three different-sized vector instructions. 5836//===----------------------------------------------------------------------===// 5837 5838defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; 5839defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; 5840defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; 5841defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; 5842defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>; 5843defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", 5844 AArch64sabd>; 5845defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", 5846 AArch64sabd>; 5847defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", 5848 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; 5849defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", 5850 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; 5851defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", 5852 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5853defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", 5854 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5855defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>; 5856defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", 5857 int_aarch64_neon_sqadd>; 5858defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", 5859 int_aarch64_neon_sqsub>; 5860defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", 5861 int_aarch64_neon_sqdmull>; 5862defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", 5863 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; 5864defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", 5865 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; 5866defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", 5867 AArch64uabd>; 5868defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", 5869 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; 5870defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", 5871 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; 5872defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", 5873 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5874defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", 5875 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5876defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>; 5877defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", 5878 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; 5879defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", 5880 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; 5881 5882// Additional patterns for [SU]ML[AS]L 5883multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode, 5884 Instruction INST8B, Instruction INST4H, Instruction INST2S> { 5885 def : Pat<(v4i16 (opnode 5886 V64:$Ra, 5887 (v4i16 (extract_subvector 5888 (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)), 5889 (i64 0))))), 5890 (EXTRACT_SUBREG (v8i16 (INST8B 5891 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub), 5892 V64:$Rn, V64:$Rm)), dsub)>; 5893 def : Pat<(v2i32 (opnode 5894 V64:$Ra, 5895 (v2i32 (extract_subvector 5896 (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)), 5897 (i64 0))))), 5898 (EXTRACT_SUBREG (v4i32 (INST4H 5899 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub), 5900 V64:$Rn, V64:$Rm)), dsub)>; 5901 def : Pat<(v1i64 (opnode 5902 V64:$Ra, 5903 (v1i64 (extract_subvector 5904 (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)), 5905 (i64 0))))), 5906 (EXTRACT_SUBREG (v2i64 (INST2S 5907 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub), 5908 V64:$Rn, V64:$Rm)), dsub)>; 5909} 5910 5911defm : Neon_mul_acc_widen_patterns<add, AArch64umull, 5912 UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; 5913defm : Neon_mul_acc_widen_patterns<add, AArch64smull, 5914 SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; 5915defm : Neon_mul_acc_widen_patterns<sub, AArch64umull, 5916 UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; 5917defm : Neon_mul_acc_widen_patterns<sub, AArch64smull, 5918 SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; 5919 5920 5921multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> { 5922 def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)), 5923 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), 5924 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>; 5925 def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)), 5926 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), 5927 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>; 5928 def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)), 5929 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), 5930 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>; 5931 5932 def : Pat<(v4i16 (opnode (v4i16 V64:$Rn), 5933 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), 5934 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5935 def : Pat<(v2i32 (opnode (v2i32 V64:$Rn), 5936 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), 5937 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5938 def : Pat<(v1i64 (opnode (v1i64 V64:$Rn), 5939 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), 5940 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5941} 5942 5943defm : Neon_addl_extract_patterns<add, zanyext, "UADD">; 5944defm : Neon_addl_extract_patterns<add, sext, "SADD">; 5945defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">; 5946defm : Neon_addl_extract_patterns<sub, sext, "SSUB">; 5947 5948// CodeGen patterns for addhn and subhn instructions, which can actually be 5949// written in LLVM IR without too much difficulty. 5950 5951// Prioritize ADDHN and SUBHN over UZP2. 5952let AddedComplexity = 10 in { 5953 5954// ADDHN 5955def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), 5956 (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5957def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5958 (i32 16))))), 5959 (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5960def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5961 (i32 32))))), 5962 (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5963def : Pat<(concat_vectors (v8i8 V64:$Rd), 5964 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5965 (i32 8))))), 5966 (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5967 V128:$Rn, V128:$Rm)>; 5968def : Pat<(concat_vectors (v4i16 V64:$Rd), 5969 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5970 (i32 16))))), 5971 (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5972 V128:$Rn, V128:$Rm)>; 5973def : Pat<(concat_vectors (v2i32 V64:$Rd), 5974 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5975 (i32 32))))), 5976 (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5977 V128:$Rn, V128:$Rm)>; 5978 5979// SUBHN 5980def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), 5981 (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5982def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5983 (i32 16))))), 5984 (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5985def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5986 (i32 32))))), 5987 (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5988def : Pat<(concat_vectors (v8i8 V64:$Rd), 5989 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5990 (i32 8))))), 5991 (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5992 V128:$Rn, V128:$Rm)>; 5993def : Pat<(concat_vectors (v4i16 V64:$Rd), 5994 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5995 (i32 16))))), 5996 (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5997 V128:$Rn, V128:$Rm)>; 5998def : Pat<(concat_vectors (v2i32 V64:$Rd), 5999 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 6000 (i32 32))))), 6001 (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 6002 V128:$Rn, V128:$Rm)>; 6003 6004} // AddedComplexity = 10 6005 6006//---------------------------------------------------------------------------- 6007// AdvSIMD bitwise extract from vector instruction. 6008//---------------------------------------------------------------------------- 6009 6010defm EXT : SIMDBitwiseExtract<"ext">; 6011 6012def AdjustExtImm : SDNodeXForm<imm, [{ 6013 return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32); 6014}]>; 6015multiclass ExtPat<ValueType VT64, ValueType VT128, int N> { 6016 def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 6017 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 6018 def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 6019 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 6020 // We use EXT to handle extract_subvector to copy the upper 64-bits of a 6021 // 128-bit vector. 6022 def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))), 6023 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 6024 // A 64-bit EXT of two halves of the same 128-bit register can be done as a 6025 // single 128-bit EXT. 6026 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)), 6027 (extract_subvector V128:$Rn, (i64 N)), 6028 (i32 imm:$imm))), 6029 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>; 6030 // A 64-bit EXT of the high half of a 128-bit register can be done using a 6031 // 128-bit EXT of the whole register with an adjustment to the immediate. The 6032 // top half of the other operand will be unset, but that doesn't matter as it 6033 // will not be used. 6034 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)), 6035 V64:$Rm, 6036 (i32 imm:$imm))), 6037 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, 6038 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 6039 (AdjustExtImm imm:$imm)), dsub)>; 6040} 6041 6042defm : ExtPat<v8i8, v16i8, 8>; 6043defm : ExtPat<v4i16, v8i16, 4>; 6044defm : ExtPat<v4f16, v8f16, 4>; 6045defm : ExtPat<v4bf16, v8bf16, 4>; 6046defm : ExtPat<v2i32, v4i32, 2>; 6047defm : ExtPat<v2f32, v4f32, 2>; 6048defm : ExtPat<v1i64, v2i64, 1>; 6049defm : ExtPat<v1f64, v2f64, 1>; 6050 6051//---------------------------------------------------------------------------- 6052// AdvSIMD zip vector 6053//---------------------------------------------------------------------------- 6054 6055defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; 6056defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; 6057defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; 6058defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; 6059defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; 6060defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; 6061 6062def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))), 6063 (v8i8 (trunc (v8i16 V128:$Vm))))), 6064 (UZP1v16i8 V128:$Vn, V128:$Vm)>; 6065def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))), 6066 (v4i16 (trunc (v4i32 V128:$Vm))))), 6067 (UZP1v8i16 V128:$Vn, V128:$Vm)>; 6068def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), 6069 (v2i32 (trunc (v2i64 V128:$Vm))))), 6070 (UZP1v4i32 V128:$Vn, V128:$Vm)>; 6071// These are the same as above, with an optional assertzext node that can be 6072// generated from fptoi lowering. 6073def : Pat<(v16i8 (concat_vectors (v8i8 (assertzext (trunc (v8i16 V128:$Vn)))), 6074 (v8i8 (assertzext (trunc (v8i16 V128:$Vm)))))), 6075 (UZP1v16i8 V128:$Vn, V128:$Vm)>; 6076def : Pat<(v8i16 (concat_vectors (v4i16 (assertzext (trunc (v4i32 V128:$Vn)))), 6077 (v4i16 (assertzext (trunc (v4i32 V128:$Vm)))))), 6078 (UZP1v8i16 V128:$Vn, V128:$Vm)>; 6079def : Pat<(v4i32 (concat_vectors (v2i32 (assertzext (trunc (v2i64 V128:$Vn)))), 6080 (v2i32 (assertzext (trunc (v2i64 V128:$Vm)))))), 6081 (UZP1v4i32 V128:$Vn, V128:$Vm)>; 6082 6083def : Pat<(v16i8 (concat_vectors 6084 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), 6085 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), 6086 (UZP2v16i8 V128:$Vn, V128:$Vm)>; 6087def : Pat<(v8i16 (concat_vectors 6088 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), 6089 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), 6090 (UZP2v8i16 V128:$Vn, V128:$Vm)>; 6091def : Pat<(v4i32 (concat_vectors 6092 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), 6093 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), 6094 (UZP2v4i32 V128:$Vn, V128:$Vm)>; 6095 6096//---------------------------------------------------------------------------- 6097// AdvSIMD TBL/TBX instructions 6098//---------------------------------------------------------------------------- 6099 6100defm TBL : SIMDTableLookup< 0, "tbl">; 6101defm TBX : SIMDTableLookupTied<1, "tbx">; 6102 6103def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 6104 (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; 6105def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 6106 (TBLv16i8One V128:$Ri, V128:$Rn)>; 6107 6108def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), 6109 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 6110 (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; 6111def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), 6112 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 6113 (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; 6114 6115//---------------------------------------------------------------------------- 6116// AdvSIMD LUT instructions 6117//---------------------------------------------------------------------------- 6118let Predicates = [HasLUT] in { 6119 defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">; 6120 defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">; 6121} 6122 6123//---------------------------------------------------------------------------- 6124// AdvSIMD scalar DUP instruction 6125//---------------------------------------------------------------------------- 6126 6127defm DUP : SIMDScalarDUP<"mov">; 6128 6129//---------------------------------------------------------------------------- 6130// AdvSIMD scalar pairwise instructions 6131//---------------------------------------------------------------------------- 6132 6133defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; 6134defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; 6135defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; 6136defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; 6137defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; 6138defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; 6139 6140// Only the lower half of the result of the inner FADDP is used in the patterns 6141// below, so the second operand does not matter. Re-use the first input 6142// operand, so no additional dependencies need to be introduced. 6143let Predicates = [HasFullFP16] in { 6144def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))), 6145 (FADDPv2i16p 6146 (EXTRACT_SUBREG 6147 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn), 6148 dsub))>; 6149def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))), 6150 (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>; 6151} 6152def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))), 6153 (FADDPv2i32p 6154 (EXTRACT_SUBREG 6155 (FADDPv4f32 V128:$Rn, V128:$Rn), 6156 dsub))>; 6157def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))), 6158 (FADDPv2i32p V64:$Rn)>; 6159def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))), 6160 (FADDPv2i64p V128:$Rn)>; 6161 6162def : Pat<(v2i64 (AArch64saddv V128:$Rn)), 6163 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 6164def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), 6165 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 6166def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), 6167 (FADDPv2i32p V64:$Rn)>; 6168def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), 6169 (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; 6170def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), 6171 (FADDPv2i64p V128:$Rn)>; 6172def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))), 6173 (FMAXNMPv2i32p V64:$Rn)>; 6174def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))), 6175 (FMAXNMPv2i64p V128:$Rn)>; 6176def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))), 6177 (FMAXPv2i32p V64:$Rn)>; 6178def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))), 6179 (FMAXPv2i64p V128:$Rn)>; 6180def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))), 6181 (FMINNMPv2i32p V64:$Rn)>; 6182def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))), 6183 (FMINNMPv2i64p V128:$Rn)>; 6184def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))), 6185 (FMINPv2i32p V64:$Rn)>; 6186def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))), 6187 (FMINPv2i64p V128:$Rn)>; 6188 6189//---------------------------------------------------------------------------- 6190// AdvSIMD INS/DUP instructions 6191//---------------------------------------------------------------------------- 6192 6193def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; 6194def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; 6195def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; 6196def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; 6197def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; 6198def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; 6199def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; 6200 6201def DUPv2i64lane : SIMDDup64FromElement; 6202def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; 6203def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; 6204def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; 6205def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; 6206def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; 6207def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; 6208 6209// DUP from a 64-bit register to a 64-bit register is just a copy 6210def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))), 6211 (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>; 6212def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))), 6213 (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>; 6214 6215def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), 6216 (v2f32 (DUPv2i32lane 6217 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 6218 (i64 0)))>; 6219def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), 6220 (v4f32 (DUPv4i32lane 6221 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 6222 (i64 0)))>; 6223def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), 6224 (v2f64 (DUPv2i64lane 6225 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), 6226 (i64 0)))>; 6227def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), 6228 (v4f16 (DUPv4i16lane 6229 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6230 (i64 0)))>; 6231def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))), 6232 (v4bf16 (DUPv4i16lane 6233 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6234 (i64 0)))>; 6235def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), 6236 (v8f16 (DUPv8i16lane 6237 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6238 (i64 0)))>; 6239def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))), 6240 (v8bf16 (DUPv8i16lane 6241 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6242 (i64 0)))>; 6243 6244def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 6245 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 6246def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 6247 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 6248 6249def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 6250 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 6251def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 6252 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 6253 6254def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 6255 (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; 6256def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 6257 (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; 6258def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), 6259 (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; 6260 6261// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane 6262// instruction even if the types don't match: we just have to remap the lane 6263// carefully. N.b. this trick only applies to truncations. 6264def VecIndex_x2 : SDNodeXForm<imm, [{ 6265 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); 6266}]>; 6267def VecIndex_x4 : SDNodeXForm<imm, [{ 6268 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); 6269}]>; 6270def VecIndex_x8 : SDNodeXForm<imm, [{ 6271 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); 6272}]>; 6273 6274multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, 6275 ValueType Src128VT, ValueType ScalVT, 6276 Instruction DUP, SDNodeXForm IdxXFORM> { 6277 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), 6278 imm:$idx)))), 6279 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 6280 6281 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), 6282 imm:$idx)))), 6283 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 6284} 6285 6286defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; 6287defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; 6288defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; 6289 6290defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; 6291defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; 6292defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; 6293 6294multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, 6295 SDNodeXForm IdxXFORM> { 6296 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), 6297 imm:$idx))))), 6298 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 6299 6300 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), 6301 imm:$idx))))), 6302 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 6303} 6304 6305defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; 6306defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; 6307defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; 6308 6309defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; 6310defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; 6311defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; 6312 6313// SMOV and UMOV definitions, with some extra patterns for convenience 6314defm SMOV : SMov; 6315defm UMOV : UMov; 6316 6317def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 6318 (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; 6319def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 6320 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 6321def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6322 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 6323def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6324 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 6325def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6326 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 6327def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), 6328 (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; 6329 6330def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 6331 VectorIndexB:$idx)))), i8), 6332 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 6333def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 6334 VectorIndexH:$idx)))), i16), 6335 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 6336 6337// Extracting i8 or i16 elements will have the zero-extend transformed to 6338// an 'and' mask by type legalization since neither i8 nor i16 are legal types 6339// for AArch64. Match these patterns here since UMOV already zeroes out the high 6340// bits of the destination register. 6341def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), 6342 (i32 0xff)), 6343 (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; 6344def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), 6345 (i32 0xffff)), 6346 (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; 6347 6348def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 6349 VectorIndexB:$idx)))), (i64 0xff))), 6350 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>; 6351def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 6352 VectorIndexH:$idx)))), (i64 0xffff))), 6353 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>; 6354 6355defm INS : SIMDIns; 6356 6357def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), 6358 (SUBREG_TO_REG (i32 0), 6359 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6360def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), 6361 (SUBREG_TO_REG (i32 0), 6362 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6363 6364// The top bits will be zero from the FMOVWSr 6365def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))), 6366 (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>; 6367 6368def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), 6369 (SUBREG_TO_REG (i32 0), 6370 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6371def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), 6372 (SUBREG_TO_REG (i32 0), 6373 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6374 6375def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 6376 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6377def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 6378 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6379 6380def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6381 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6382def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6383 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6384 6385def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), 6386 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 6387 (i32 FPR32:$Rn), ssub))>; 6388def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), 6389 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6390 (i32 FPR32:$Rn), ssub))>; 6391 6392def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), 6393 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 6394 (i64 FPR64:$Rn), dsub))>; 6395 6396def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 6397 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6398def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 6399 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6400 6401def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6402 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6403def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6404 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6405 6406def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), 6407 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 6408def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), 6409 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 6410 6411def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), 6412 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; 6413 6414def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), 6415 (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 6416 (EXTRACT_SUBREG 6417 (INSvi16lane 6418 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6419 VectorIndexS:$imm, 6420 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6421 (i64 0)), 6422 dsub)>; 6423 6424def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), 6425 (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>; 6426def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), 6427 (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>; 6428def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), 6429 (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>; 6430def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), 6431 (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>; 6432def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)), 6433 (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>; 6434 6435def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), 6436 (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 6437 (INSvi16lane 6438 V128:$Rn, VectorIndexH:$imm, 6439 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6440 (i64 0))>; 6441 6442def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn), 6443 (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 6444 (EXTRACT_SUBREG 6445 (INSvi16lane 6446 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6447 VectorIndexS:$imm, 6448 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6449 (i64 0)), 6450 dsub)>; 6451 6452def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn), 6453 (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 6454 (INSvi16lane 6455 V128:$Rn, VectorIndexH:$imm, 6456 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6457 (i64 0))>; 6458 6459def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), 6460 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 6461 (EXTRACT_SUBREG 6462 (INSvi32lane 6463 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6464 VectorIndexS:$imm, 6465 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 6466 (i64 0)), 6467 dsub)>; 6468def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), 6469 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 6470 (INSvi32lane 6471 V128:$Rn, VectorIndexS:$imm, 6472 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 6473 (i64 0))>; 6474def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), 6475 (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), 6476 (INSvi64lane 6477 V128:$Rn, VectorIndexD:$imm, 6478 (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), 6479 (i64 0))>; 6480 6481def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))), 6482 (EXTRACT_SUBREG 6483 (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6484 VectorIndexS:$imm, GPR32:$Rm), 6485 dsub)>; 6486def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))), 6487 (EXTRACT_SUBREG 6488 (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6489 VectorIndexH:$imm, GPR32:$Rm), 6490 dsub)>; 6491def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))), 6492 (EXTRACT_SUBREG 6493 (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6494 VectorIndexB:$imm, GPR32:$Rm), 6495 dsub)>; 6496 6497// Copy an element at a constant index in one vector into a constant indexed 6498// element of another. 6499// FIXME refactor to a shared class/dev parameterized on vector type, vector 6500// index type and INS extension 6501def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane 6502 (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), 6503 VectorIndexB:$idx2)), 6504 (v16i8 (INSvi8lane 6505 V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) 6506 )>; 6507def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane 6508 (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), 6509 VectorIndexH:$idx2)), 6510 (v8i16 (INSvi16lane 6511 V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) 6512 )>; 6513def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane 6514 (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), 6515 VectorIndexS:$idx2)), 6516 (v4i32 (INSvi32lane 6517 V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) 6518 )>; 6519def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane 6520 (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), 6521 VectorIndexD:$idx2)), 6522 (v2i64 (INSvi64lane 6523 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) 6524 )>; 6525 6526multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, 6527 ValueType VTScal, Instruction INS> { 6528 def : Pat<(VT128 (vector_insert V128:$src, 6529 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6530 imm:$Immd)), 6531 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; 6532 6533 def : Pat<(VT128 (vector_insert V128:$src, 6534 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6535 imm:$Immd)), 6536 (INS V128:$src, imm:$Immd, 6537 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; 6538 6539 def : Pat<(VT64 (vector_insert V64:$src, 6540 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6541 imm:$Immd)), 6542 (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), 6543 imm:$Immd, V128:$Rn, imm:$Immn), 6544 dsub)>; 6545 6546 def : Pat<(VT64 (vector_insert V64:$src, 6547 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6548 imm:$Immd)), 6549 (EXTRACT_SUBREG 6550 (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, 6551 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), 6552 dsub)>; 6553} 6554 6555defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; 6556defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>; 6557defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; 6558defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; 6559 6560defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>; 6561defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>; 6562defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>; 6563defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>; 6564 6565// Insert from bitcast 6566// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0) 6567def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6568 (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>; 6569def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6570 (EXTRACT_SUBREG 6571 (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)), 6572 imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0), 6573 dsub)>; 6574def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)), 6575 (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>; 6576 6577// bitcast of an extract 6578// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) 6579def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), 6580 (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>; 6581def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))), 6582 (EXTRACT_SUBREG V128:$src, ssub)>; 6583def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), 6584 (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>; 6585def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))), 6586 (EXTRACT_SUBREG V128:$src, dsub)>; 6587 6588// Floating point vector extractions are codegen'd as either a sequence of 6589// subregister extractions, or a MOV (aka DUP here) if 6590// the lane number is anything other than zero. 6591def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))), 6592 (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; 6593def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))), 6594 (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; 6595def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))), 6596 (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6597def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))), 6598 (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6599 6600 6601def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), 6602 (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>; 6603def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), 6604 (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>; 6605def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), 6606 (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6607def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), 6608 (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6609 6610// All concat_vectors operations are canonicalised to act on i64 vectors for 6611// AArch64. In the general case we need an instruction, which had just as well be 6612// INS. 6613class ConcatPat<ValueType DstTy, ValueType SrcTy> 6614 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), 6615 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, 6616 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; 6617 6618def : ConcatPat<v2i64, v1i64>; 6619def : ConcatPat<v2f64, v1f64>; 6620def : ConcatPat<v4i32, v2i32>; 6621def : ConcatPat<v4f32, v2f32>; 6622def : ConcatPat<v8i16, v4i16>; 6623def : ConcatPat<v8f16, v4f16>; 6624def : ConcatPat<v8bf16, v4bf16>; 6625def : ConcatPat<v16i8, v8i8>; 6626 6627// If the high lanes are undef, though, we can just ignore them: 6628class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> 6629 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), 6630 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; 6631 6632def : ConcatUndefPat<v2i64, v1i64>; 6633def : ConcatUndefPat<v2f64, v1f64>; 6634def : ConcatUndefPat<v4i32, v2i32>; 6635def : ConcatUndefPat<v4f32, v2f32>; 6636def : ConcatUndefPat<v8i16, v4i16>; 6637def : ConcatUndefPat<v16i8, v8i8>; 6638 6639//---------------------------------------------------------------------------- 6640// AdvSIMD across lanes instructions 6641//---------------------------------------------------------------------------- 6642 6643defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; 6644defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; 6645defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; 6646defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; 6647defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; 6648defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; 6649defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; 6650defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>; 6651defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>; 6652defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>; 6653defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>; 6654 6655multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> { 6656 // Patterns for addv(addlp(x)) ==> addlv 6657 def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, 6658 (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))), 6659 (i64 0))), (i64 0))), 6660 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6661 (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>; 6662 def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))), 6663 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6664 (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>; 6665 def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))), 6666 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>; 6667 6668 // Patterns for addp(addlp(x))) ==> addlv 6669 def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))), 6670 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>; 6671 def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))), 6672 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>; 6673} 6674 6675defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>; 6676defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>; 6677 6678// Pattern is used for GlobalISel 6679multiclass SIMDAcrossLaneLongPairIntrinsicGISel<string Opc, SDPatternOperator addlp> { 6680 // Patterns for addv(addlp(x)) ==> addlv 6681 def : Pat<(i16 (vecreduce_add (v4i16 (addlp (v8i8 V64:$Rn))))), 6682 (!cast<Instruction>(Opc#"v8i8v") V64:$Rn)>; 6683 def : Pat<(i16 (vecreduce_add (v8i16 (addlp (v16i8 V128:$Rn))))), 6684 (!cast<Instruction>(Opc#"v16i8v") V128:$Rn)>; 6685 def : Pat<(i32 (vecreduce_add (v4i32 (addlp (v8i16 V128:$Rn))))), 6686 (!cast<Instruction>(Opc#"v8i16v") V128:$Rn)>; 6687 6688 // Patterns for addp(addlp(x))) ==> addlv 6689 def : Pat<(i32 (vecreduce_add (v2i32 (addlp (v4i16 V64:$Rn))))), 6690 (!cast<Instruction>(Opc#"v4i16v") V64:$Rn)>; 6691 def : Pat<(i64 (vecreduce_add (v2i64 (addlp (v4i32 V128:$Rn))))), 6692 (!cast<Instruction>(Opc#"v4i32v") V128:$Rn)>; 6693} 6694 6695defm : SIMDAcrossLaneLongPairIntrinsicGISel<"UADDLV", AArch64uaddlp>; 6696defm : SIMDAcrossLaneLongPairIntrinsicGISel<"SADDLV", AArch64saddlp>; 6697 6698// Patterns for uaddlv(uaddlp(x)) ==> uaddlv 6699def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), 6700 (i64 (EXTRACT_SUBREG 6701 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)), 6702 dsub))>; 6703 6704def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))), 6705 (i32 (EXTRACT_SUBREG 6706 (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)), 6707 ssub))>; 6708 6709def : Pat<(v2i64 (AArch64uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), 6710 (v2i64 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub))>; 6711 6712def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))), 6713 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>; 6714 6715def : Pat<(v4i32 (AArch64uaddlv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))), 6716 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub))>; 6717 6718multiclass SIMDAcrossLaneLongReductionIntrinsic<string Opc, SDPatternOperator addlv> { 6719 def : Pat<(v4i32 (addlv (v8i8 V64:$Rn))), 6720 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i8v") V64:$Rn), hsub))>; 6721 6722 def : Pat<(v4i32 (addlv (v4i16 V64:$Rn))), 6723 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i16v") V64:$Rn), ssub))>; 6724 6725 def : Pat<(v4i32 (addlv (v16i8 V128:$Rn))), 6726 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v16i8v") V128:$Rn), hsub))>; 6727 6728 def : Pat<(v4i32 (addlv (v8i16 V128:$Rn))), 6729 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i16v") V128:$Rn), ssub))>; 6730 6731 def : Pat<(v2i64 (addlv (v4i32 V128:$Rn))), 6732 (v2i64 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i32v") V128:$Rn), dsub))>; 6733} 6734 6735defm : SIMDAcrossLaneLongReductionIntrinsic<"UADDLV", AArch64uaddlv>; 6736defm : SIMDAcrossLaneLongReductionIntrinsic<"SADDLV", AArch64saddlv>; 6737 6738// Patterns for across-vector intrinsics, that have a node equivalent, that 6739// returns a vector (with only the low lane defined) instead of a scalar. 6740// In effect, opNode is the same as (scalar_to_vector (IntNode)). 6741multiclass SIMDAcrossLanesIntrinsic<string baseOpc, 6742 SDPatternOperator opNode> { 6743// If a lane instruction caught the vector_extract around opNode, we can 6744// directly match the latter to the instruction. 6745def : Pat<(v8i8 (opNode V64:$Rn)), 6746 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6747 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; 6748def : Pat<(v16i8 (opNode V128:$Rn)), 6749 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6750 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; 6751def : Pat<(v4i16 (opNode V64:$Rn)), 6752 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6753 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; 6754def : Pat<(v8i16 (opNode V128:$Rn)), 6755 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6756 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; 6757def : Pat<(v4i32 (opNode V128:$Rn)), 6758 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6759 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; 6760 6761 6762// If none did, fallback to the explicit patterns, consuming the vector_extract. 6763def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), 6764 (i64 0)), (i64 0))), 6765 (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6766 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), 6767 bsub), ssub)>; 6768def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), 6769 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6770 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), 6771 bsub), ssub)>; 6772def : Pat<(i32 (vector_extract (insert_subvector undef, 6773 (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))), 6774 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6775 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), 6776 hsub), ssub)>; 6777def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), 6778 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6779 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), 6780 hsub), ssub)>; 6781def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), 6782 (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6783 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), 6784 ssub), ssub)>; 6785 6786} 6787 6788multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, 6789 SDPatternOperator opNode> 6790 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6791// If there is a sign extension after this intrinsic, consume it as smov already 6792// performed it 6793def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6794 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)), 6795 (i32 (SMOVvi8to32 6796 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6797 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6798 (i64 0)))>; 6799def : Pat<(i32 (sext_inreg (i32 (vector_extract 6800 (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), 6801 (i32 (SMOVvi8to32 6802 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6803 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6804 (i64 0)))>; 6805def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6806 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)), 6807 (i32 (SMOVvi16to32 6808 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6809 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6810 (i64 0)))>; 6811def : Pat<(i32 (sext_inreg (i32 (vector_extract 6812 (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), 6813 (i32 (SMOVvi16to32 6814 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6815 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6816 (i64 0)))>; 6817} 6818 6819multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, 6820 SDPatternOperator opNode> 6821 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6822// If there is a masking operation keeping only what has been actually 6823// generated, consume it. 6824def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6825 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)), 6826 (i32 (EXTRACT_SUBREG 6827 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6828 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6829 ssub))>; 6830def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), 6831 maski8_or_more)), 6832 (i32 (EXTRACT_SUBREG 6833 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6834 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6835 ssub))>; 6836def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6837 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)), 6838 (i32 (EXTRACT_SUBREG 6839 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6840 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6841 ssub))>; 6842def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), 6843 maski16_or_more)), 6844 (i32 (EXTRACT_SUBREG 6845 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6846 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6847 ssub))>; 6848} 6849 6850// For vecreduce_add, used by GlobalISel not SDAG 6851def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))), 6852 (i8 (ADDVv8i8v V64:$Rn))>; 6853def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))), 6854 (i8 (ADDVv16i8v V128:$Rn))>; 6855def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))), 6856 (i16 (ADDVv4i16v V64:$Rn))>; 6857def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))), 6858 (i16 (ADDVv8i16v V128:$Rn))>; 6859def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))), 6860 (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6861def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))), 6862 (i32 (ADDVv4i32v V128:$Rn))>; 6863def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))), 6864 (i64 (ADDPv2i64p V128:$Rn))>; 6865 6866defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; 6867// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6868def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), 6869 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6870 6871defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; 6872// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6873def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), 6874 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6875 6876defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; 6877def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), 6878 (SMAXPv2i32 V64:$Rn, V64:$Rn)>; 6879 6880defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; 6881def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), 6882 (SMINPv2i32 V64:$Rn, V64:$Rn)>; 6883 6884defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; 6885def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), 6886 (UMAXPv2i32 V64:$Rn, V64:$Rn)>; 6887 6888defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; 6889def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), 6890 (UMINPv2i32 V64:$Rn, V64:$Rn)>; 6891 6892// For vecreduce_{opc} used by GlobalISel, not SDAG at the moment 6893// because GlobalISel allows us to specify the return register to be a FPR 6894multiclass SIMDAcrossLanesVecReductionIntrinsic<string baseOpc, 6895 SDPatternOperator opNode> { 6896def : Pat<(i8 (opNode (v8i8 FPR64:$Rn))), 6897 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) FPR64:$Rn)>; 6898 6899def : Pat<(i8 (opNode (v16i8 FPR128:$Rn))), 6900 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) FPR128:$Rn)>; 6901 6902def : Pat<(i16 (opNode (v4i16 FPR64:$Rn))), 6903 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) FPR64:$Rn)>; 6904 6905def : Pat<(i16 (opNode (v8i16 FPR128:$Rn))), 6906 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) FPR128:$Rn)>; 6907 6908def : Pat<(i32 (opNode (v4i32 V128:$Rn))), 6909 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn)>; 6910} 6911 6912// For v2i32 source type, the pairwise instruction can be used instead 6913defm : SIMDAcrossLanesVecReductionIntrinsic<"UMINV", vecreduce_umin>; 6914def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))), 6915 (i32 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6916 6917defm : SIMDAcrossLanesVecReductionIntrinsic<"UMAXV", vecreduce_umax>; 6918def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))), 6919 (i32 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6920 6921defm : SIMDAcrossLanesVecReductionIntrinsic<"SMINV", vecreduce_smin>; 6922def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))), 6923 (i32 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6924 6925defm : SIMDAcrossLanesVecReductionIntrinsic<"SMAXV", vecreduce_smax>; 6926def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))), 6927 (i32 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6928 6929multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { 6930 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6931 (i32 (SMOVvi16to32 6932 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6933 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6934 (i64 0)))>; 6935def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6936 (i32 (SMOVvi16to32 6937 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6938 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6939 (i64 0)))>; 6940 6941def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6942 (i32 (EXTRACT_SUBREG 6943 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6944 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6945 ssub))>; 6946def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6947 (i32 (EXTRACT_SUBREG 6948 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6949 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6950 ssub))>; 6951 6952def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6953 (i64 (EXTRACT_SUBREG 6954 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6955 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6956 dsub))>; 6957} 6958 6959multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, 6960 Intrinsic intOp> { 6961 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6962 (i32 (EXTRACT_SUBREG 6963 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6964 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6965 ssub))>; 6966def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6967 (i32 (EXTRACT_SUBREG 6968 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6969 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6970 ssub))>; 6971 6972def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6973 (i32 (EXTRACT_SUBREG 6974 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6975 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6976 ssub))>; 6977def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6978 (i32 (EXTRACT_SUBREG 6979 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6980 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6981 ssub))>; 6982 6983def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6984 (i64 (EXTRACT_SUBREG 6985 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6986 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6987 dsub))>; 6988} 6989 6990defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; 6991defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; 6992 6993// The vaddlv_s32 intrinsic gets mapped to SADDLP. 6994def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), 6995 (i64 (EXTRACT_SUBREG 6996 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6997 (SADDLPv2i32_v1i64 V64:$Rn), dsub), 6998 dsub))>; 6999// The vaddlv_u32 intrinsic gets mapped to UADDLP. 7000def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), 7001 (i64 (EXTRACT_SUBREG 7002 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 7003 (UADDLPv2i32_v1i64 V64:$Rn), dsub), 7004 dsub))>; 7005 7006//------------------------------------------------------------------------------ 7007// AdvSIMD modified immediate instructions 7008//------------------------------------------------------------------------------ 7009 7010// AdvSIMD BIC 7011defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; 7012// AdvSIMD ORR 7013defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; 7014 7015def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 7016def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 7017def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 7018def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 7019 7020def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 7021def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 7022def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 7023def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 7024 7025def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 7026def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 7027def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 7028def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 7029 7030def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 7031def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 7032def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 7033def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 7034 7035// AdvSIMD FMOV 7036def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, 7037 "fmov", ".2d", 7038 [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 7039def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, 7040 "fmov", ".2s", 7041 [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 7042def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, 7043 "fmov", ".4s", 7044 [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 7045let Predicates = [HasNEON, HasFullFP16] in { 7046def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, 7047 "fmov", ".4h", 7048 [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 7049def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, 7050 "fmov", ".8h", 7051 [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 7052} // Predicates = [HasNEON, HasFullFP16] 7053 7054// AdvSIMD MOVI 7055 7056// EDIT byte mask: scalar 7057let isReMaterializable = 1, isAsCheapAsAMove = 1 in 7058def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", 7059 [(set FPR64:$Rd, simdimmtype10:$imm8)]>; 7060// The movi_edit node has the immediate value already encoded, so we use 7061// a plain imm0_255 here. 7062def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), 7063 (MOVID imm0_255:$shift)>; 7064 7065// EDIT byte mask: 2d 7066 7067// The movi_edit node has the immediate value already encoded, so we use 7068// a plain imm0_255 in the pattern 7069let isReMaterializable = 1, isAsCheapAsAMove = 1 in 7070def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, 7071 simdimmtype10, 7072 "movi", ".2d", 7073 [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; 7074 7075def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7076def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7077def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7078def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7079def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7080def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7081def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7082def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7083 7084def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7085def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7086def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7087def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7088 7089// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the 7090// extract is free and this gives better MachineCSE results. 7091def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7092def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7093def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7094def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7095def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>; 7096def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>; 7097def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>; 7098def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>; 7099 7100def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7101def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7102def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7103def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7104 7105// EDIT per word & halfword: 2s, 4h, 4s, & 8h 7106let isReMaterializable = 1, isAsCheapAsAMove = 1 in 7107defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; 7108 7109let Predicates = [HasNEON] in { 7110 // Using the MOVI to materialize fp constants. 7111 def : Pat<(f32 fpimm32SIMDModImmType4:$in), 7112 (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in), 7113 (i32 24)), 7114 ssub)>; 7115} 7116 7117def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7118def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7119def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7120def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7121 7122def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7123def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7124def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7125def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7126 7127def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7128 (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; 7129def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7130 (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; 7131def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7132 (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; 7133def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7134 (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; 7135 7136let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 7137// EDIT per word: 2s & 4s with MSL shifter 7138def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", 7139 [(set (v2i32 V64:$Rd), 7140 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7141def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", 7142 [(set (v4i32 V128:$Rd), 7143 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7144 7145// Per byte: 8b & 16b 7146def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, 7147 "movi", ".8b", 7148 [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; 7149 7150def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, 7151 "movi", ".16b", 7152 [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; 7153} 7154 7155// AdvSIMD MVNI 7156 7157// EDIT per word & halfword: 2s, 4h, 4s, & 8h 7158let isReMaterializable = 1, isAsCheapAsAMove = 1 in 7159defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; 7160 7161def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7162def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7163def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7164def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7165 7166def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7167def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7168def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7169def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7170 7171def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7172 (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; 7173def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7174 (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; 7175def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7176 (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; 7177def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7178 (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; 7179 7180// EDIT per word: 2s & 4s with MSL shifter 7181let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 7182def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", 7183 [(set (v2i32 V64:$Rd), 7184 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7185def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", 7186 [(set (v4i32 V128:$Rd), 7187 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7188} 7189 7190//---------------------------------------------------------------------------- 7191// AdvSIMD indexed element 7192//---------------------------------------------------------------------------- 7193 7194let hasSideEffects = 0 in { 7195 defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; 7196 defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; 7197} 7198 7199// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the 7200// instruction expects the addend first, while the intrinsic expects it last. 7201 7202// On the other hand, there are quite a few valid combinatorial options due to 7203// the commutativity of multiplication and the fact that (-x) * y = x * (-y). 7204defm : SIMDFPIndexedTiedPatterns<"FMLA", 7205 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>; 7206defm : SIMDFPIndexedTiedPatterns<"FMLA", 7207 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>; 7208 7209defm : SIMDFPIndexedTiedPatterns<"FMLS", 7210 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 7211defm : SIMDFPIndexedTiedPatterns<"FMLS", 7212 TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; 7213defm : SIMDFPIndexedTiedPatterns<"FMLS", 7214 TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; 7215defm : SIMDFPIndexedTiedPatterns<"FMLS", 7216 TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; 7217 7218multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { 7219 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit 7220 // and DUP scalar. 7221 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 7222 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 7223 VectorIndexS:$idx))), 7224 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; 7225 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 7226 (v2f32 (AArch64duplane32 7227 (v4f32 (insert_subvector undef, 7228 (v2f32 (fneg V64:$Rm)), 7229 (i64 0))), 7230 VectorIndexS:$idx)))), 7231 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 7232 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 7233 VectorIndexS:$idx)>; 7234 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 7235 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 7236 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 7237 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 7238 7239 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit 7240 // and DUP scalar. 7241 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 7242 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 7243 VectorIndexS:$idx))), 7244 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, 7245 VectorIndexS:$idx)>; 7246 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 7247 (v4f32 (AArch64duplane32 7248 (v4f32 (insert_subvector undef, 7249 (v2f32 (fneg V64:$Rm)), 7250 (i64 0))), 7251 VectorIndexS:$idx)))), 7252 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 7253 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 7254 VectorIndexS:$idx)>; 7255 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 7256 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 7257 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 7258 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 7259 7260 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar 7261 // (DUPLANE from 64-bit would be trivial). 7262 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 7263 (AArch64duplane64 (v2f64 (fneg V128:$Rm)), 7264 VectorIndexD:$idx))), 7265 (FMLSv2i64_indexed 7266 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; 7267 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 7268 (AArch64dup (f64 (fneg FPR64Op:$Rm))))), 7269 (FMLSv2i64_indexed V128:$Rd, V128:$Rn, 7270 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; 7271 7272 // 2 variants for 32-bit scalar version: extract from .2s or from .4s 7273 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 7274 (vector_extract (v4f32 (fneg V128:$Rm)), 7275 VectorIndexS:$idx))), 7276 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 7277 V128:$Rm, VectorIndexS:$idx)>; 7278 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 7279 (vector_extract (v4f32 (insert_subvector undef, 7280 (v2f32 (fneg V64:$Rm)), 7281 (i64 0))), 7282 VectorIndexS:$idx))), 7283 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 7284 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; 7285 7286 // 1 variant for 64-bit scalar version: extract from .1d or from .2d 7287 def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), 7288 (vector_extract (v2f64 (fneg V128:$Rm)), 7289 VectorIndexS:$idx))), 7290 (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, 7291 V128:$Rm, VectorIndexS:$idx)>; 7292} 7293 7294defm : FMLSIndexedAfterNegPatterns< 7295 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 7296defm : FMLSIndexedAfterNegPatterns< 7297 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >; 7298 7299defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; 7300defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>; 7301 7302def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 7303 (FMULv2i32_indexed V64:$Rn, 7304 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 7305 (i64 0))>; 7306def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 7307 (FMULv4i32_indexed V128:$Rn, 7308 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 7309 (i64 0))>; 7310def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), 7311 (FMULv2i64_indexed V128:$Rn, 7312 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), 7313 (i64 0))>; 7314 7315defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; 7316defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 7317 7318defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane, 7319 int_aarch64_neon_sqdmulh_laneq>; 7320defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane, 7321 int_aarch64_neon_sqrdmulh_laneq>; 7322 7323// Generated by MachineCombine 7324defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>; 7325defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; 7326 7327defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; 7328defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", 7329 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 7330defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", 7331 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 7332defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>; 7333defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", 7334 int_aarch64_neon_sqadd>; 7335defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", 7336 int_aarch64_neon_sqsub>; 7337defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", 7338 int_aarch64_neon_sqrdmlah>; 7339defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", 7340 int_aarch64_neon_sqrdmlsh>; 7341defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; 7342defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", 7343 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 7344defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", 7345 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 7346defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>; 7347 7348// A scalar sqdmull with the second operand being a vector lane can be 7349// handled directly with the indexed instruction encoding. 7350def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 7351 (vector_extract (v4i32 V128:$Vm), 7352 VectorIndexS:$idx)), 7353 (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; 7354 7355//---------------------------------------------------------------------------- 7356// AdvSIMD scalar shift instructions 7357//---------------------------------------------------------------------------- 7358defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; 7359defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; 7360defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; 7361defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; 7362// Codegen patterns for the above. We don't put these directly on the 7363// instructions because TableGen's type inference can't handle the truth. 7364// Having the same base pattern for fp <--> int totally freaks it out. 7365def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), 7366 (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; 7367def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), 7368 (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; 7369def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), 7370 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 7371def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), 7372 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 7373def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), 7374 vecshiftR64:$imm)), 7375 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 7376def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), 7377 vecshiftR64:$imm)), 7378 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 7379def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), 7380 (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 7381def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 7382 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7383def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), 7384 vecshiftR64:$imm)), 7385 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7386def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 7387 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7388def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), 7389 vecshiftR64:$imm)), 7390 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7391def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), 7392 (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 7393 7394// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported. 7395 7396def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), 7397 (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7398def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), 7399 (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7400def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 7401 (SCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>; 7402def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp 7403 (and FPR32:$Rn, (i32 65535)), 7404 vecshiftR16:$imm)), 7405 (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7406def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), 7407 (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7408def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 7409 (UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>; 7410def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), 7411 (i32 (INSERT_SUBREG 7412 (i32 (IMPLICIT_DEF)), 7413 (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), 7414 hsub))>; 7415def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), 7416 (i64 (INSERT_SUBREG 7417 (i64 (IMPLICIT_DEF)), 7418 (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), 7419 hsub))>; 7420def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), 7421 (i32 (INSERT_SUBREG 7422 (i32 (IMPLICIT_DEF)), 7423 (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), 7424 hsub))>; 7425def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), 7426 (i64 (INSERT_SUBREG 7427 (i64 (IMPLICIT_DEF)), 7428 (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), 7429 hsub))>; 7430def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 7431 (i32 (INSERT_SUBREG 7432 (i32 (IMPLICIT_DEF)), 7433 (FACGE16 FPR16:$Rn, FPR16:$Rm), 7434 hsub))>; 7435def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 7436 (i32 (INSERT_SUBREG 7437 (i32 (IMPLICIT_DEF)), 7438 (FACGT16 FPR16:$Rn, FPR16:$Rm), 7439 hsub))>; 7440 7441defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; 7442defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; 7443defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", 7444 int_aarch64_neon_sqrshrn>; 7445defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", 7446 int_aarch64_neon_sqrshrun>; 7447defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 7448defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 7449defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", 7450 int_aarch64_neon_sqshrn>; 7451defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", 7452 int_aarch64_neon_sqshrun>; 7453defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; 7454defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; 7455defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", 7456 TriOpFrag<(add node:$LHS, 7457 (AArch64srshri node:$MHS, node:$RHS))>>; 7458defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; 7459defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", 7460 TriOpFrag<(add_and_or_is_add node:$LHS, 7461 (AArch64vashr node:$MHS, node:$RHS))>>; 7462defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", 7463 int_aarch64_neon_uqrshrn>; 7464defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 7465defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", 7466 int_aarch64_neon_uqshrn>; 7467defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; 7468defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", 7469 TriOpFrag<(add node:$LHS, 7470 (AArch64urshri node:$MHS, node:$RHS))>>; 7471defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; 7472defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", 7473 TriOpFrag<(add_and_or_is_add node:$LHS, 7474 (AArch64vlshr node:$MHS, node:$RHS))>>; 7475 7476//---------------------------------------------------------------------------- 7477// AdvSIMD vector shift instructions 7478//---------------------------------------------------------------------------- 7479defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; 7480defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; 7481defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", 7482 int_aarch64_neon_vcvtfxs2fp>; 7483defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>; 7484defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; 7485 7486// X << 1 ==> X + X 7487class SHLToADDPat<ValueType ty, RegisterClass regtype> 7488 : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))), 7489 (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>; 7490 7491def : SHLToADDPat<v16i8, FPR128>; 7492def : SHLToADDPat<v8i16, FPR128>; 7493def : SHLToADDPat<v4i32, FPR128>; 7494def : SHLToADDPat<v2i64, FPR128>; 7495def : SHLToADDPat<v8i8, FPR64>; 7496def : SHLToADDPat<v4i16, FPR64>; 7497def : SHLToADDPat<v2i32, FPR64>; 7498 7499defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", 7500 BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; 7501defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>; 7502def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 7503 (i32 vecshiftL64:$imm))), 7504 (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; 7505defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", 7506 int_aarch64_neon_sqrshrn>; 7507defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", 7508 int_aarch64_neon_sqrshrun>; 7509defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 7510defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 7511defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", 7512 int_aarch64_neon_sqshrn>; 7513defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", 7514 int_aarch64_neon_sqshrun>; 7515defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>; 7516def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 7517 (i32 vecshiftR64:$imm))), 7518 (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; 7519defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; 7520defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", 7521 TriOpFrag<(add node:$LHS, 7522 (AArch64srshri node:$MHS, node:$RHS))> >; 7523defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", 7524 BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; 7525 7526defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; 7527defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", 7528 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; 7529defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", 7530 int_aarch64_neon_vcvtfxu2fp>; 7531defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", 7532 int_aarch64_neon_uqrshrn>; 7533defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 7534defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", 7535 int_aarch64_neon_uqshrn>; 7536defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; 7537defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", 7538 TriOpFrag<(add node:$LHS, 7539 (AArch64urshri node:$MHS, node:$RHS))> >; 7540defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", 7541 BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; 7542defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; 7543defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", 7544 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; 7545 7546// RADDHN patterns for when RSHRN shifts by half the size of the vector element 7547def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))), 7548 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; 7549def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))), 7550 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; 7551let AddedComplexity = 5 in 7552def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))), 7553 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; 7554def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))), 7555 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; 7556def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))), 7557 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; 7558def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))), 7559 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; 7560 7561// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element 7562def : Pat<(v16i8 (concat_vectors 7563 (v8i8 V64:$Vd), 7564 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))), 7565 (RADDHNv8i16_v16i8 7566 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7567 (v8i16 (MOVIv2d_ns (i32 0))))>; 7568def : Pat<(v8i16 (concat_vectors 7569 (v4i16 V64:$Vd), 7570 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))), 7571 (RADDHNv4i32_v8i16 7572 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7573 (v4i32 (MOVIv2d_ns (i32 0))))>; 7574let AddedComplexity = 5 in 7575def : Pat<(v4i32 (concat_vectors 7576 (v2i32 V64:$Vd), 7577 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))), 7578 (RADDHNv2i64_v4i32 7579 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7580 (v2i64 (MOVIv2d_ns (i32 0))))>; 7581def : Pat<(v16i8 (concat_vectors 7582 (v8i8 V64:$Vd), 7583 (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))), 7584 (RADDHNv8i16_v16i8 7585 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7586 (v8i16 (MOVIv2d_ns (i32 0))))>; 7587def : Pat<(v8i16 (concat_vectors 7588 (v4i16 V64:$Vd), 7589 (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))), 7590 (RADDHNv4i32_v8i16 7591 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7592 (v4i32 (MOVIv2d_ns (i32 0))))>; 7593def : Pat<(v4i32 (concat_vectors 7594 (v2i32 V64:$Vd), 7595 (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))), 7596 (RADDHNv2i64_v4i32 7597 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7598 (v2i64 (MOVIv2d_ns (i32 0))))>; 7599 7600// SHRN patterns for when a logical right shift was used instead of arithmetic 7601// (the immediate guarantees no sign bits actually end up in the result so it 7602// doesn't matter). 7603def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), 7604 (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; 7605def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), 7606 (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; 7607def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), 7608 (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; 7609 7610def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), 7611 (trunc (AArch64vlshr (v8i16 V128:$Rn), 7612 vecshiftR16Narrow:$imm)))), 7613 (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7614 V128:$Rn, vecshiftR16Narrow:$imm)>; 7615def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), 7616 (trunc (AArch64vlshr (v4i32 V128:$Rn), 7617 vecshiftR32Narrow:$imm)))), 7618 (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7619 V128:$Rn, vecshiftR32Narrow:$imm)>; 7620def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), 7621 (trunc (AArch64vlshr (v2i64 V128:$Rn), 7622 vecshiftR64Narrow:$imm)))), 7623 (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7624 V128:$Rn, vecshiftR32Narrow:$imm)>; 7625 7626// Vector sign and zero extensions are implemented with SSHLL and USSHLL. 7627// Anyexts are implemented as zexts. 7628def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; 7629def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 7630def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 7631def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; 7632def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 7633def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 7634def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; 7635def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 7636def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 7637// Also match an extend from the upper half of a 128 bit source register. 7638def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), 7639 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 7640def : Pat<(v8i16 (zext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), 7641 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 7642def : Pat<(v8i16 (sext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), 7643 (SSHLLv16i8_shift V128:$Rn, (i32 0))>; 7644def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), 7645 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 7646def : Pat<(v4i32 (zext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), 7647 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 7648def : Pat<(v4i32 (sext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), 7649 (SSHLLv8i16_shift V128:$Rn, (i32 0))>; 7650def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), 7651 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 7652def : Pat<(v2i64 (zext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), 7653 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 7654def : Pat<(v2i64 (sext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), 7655 (SSHLLv4i32_shift V128:$Rn, (i32 0))>; 7656 7657// Vector shift sxtl aliases 7658def : InstAlias<"sxtl.8h $dst, $src1", 7659 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7660def : InstAlias<"sxtl $dst.8h, $src1.8b", 7661 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7662def : InstAlias<"sxtl.4s $dst, $src1", 7663 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7664def : InstAlias<"sxtl $dst.4s, $src1.4h", 7665 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7666def : InstAlias<"sxtl.2d $dst, $src1", 7667 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7668def : InstAlias<"sxtl $dst.2d, $src1.2s", 7669 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7670 7671// Vector shift sxtl2 aliases 7672def : InstAlias<"sxtl2.8h $dst, $src1", 7673 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7674def : InstAlias<"sxtl2 $dst.8h, $src1.16b", 7675 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7676def : InstAlias<"sxtl2.4s $dst, $src1", 7677 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7678def : InstAlias<"sxtl2 $dst.4s, $src1.8h", 7679 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7680def : InstAlias<"sxtl2.2d $dst, $src1", 7681 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7682def : InstAlias<"sxtl2 $dst.2d, $src1.4s", 7683 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7684 7685// Vector shift uxtl aliases 7686def : InstAlias<"uxtl.8h $dst, $src1", 7687 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7688def : InstAlias<"uxtl $dst.8h, $src1.8b", 7689 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7690def : InstAlias<"uxtl.4s $dst, $src1", 7691 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7692def : InstAlias<"uxtl $dst.4s, $src1.4h", 7693 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7694def : InstAlias<"uxtl.2d $dst, $src1", 7695 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7696def : InstAlias<"uxtl $dst.2d, $src1.2s", 7697 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7698 7699// Vector shift uxtl2 aliases 7700def : InstAlias<"uxtl2.8h $dst, $src1", 7701 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7702def : InstAlias<"uxtl2 $dst.8h, $src1.16b", 7703 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7704def : InstAlias<"uxtl2.4s $dst, $src1", 7705 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7706def : InstAlias<"uxtl2 $dst.4s, $src1.8h", 7707 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7708def : InstAlias<"uxtl2.2d $dst, $src1", 7709 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7710def : InstAlias<"uxtl2 $dst.2d, $src1.4s", 7711 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7712 7713// If an integer is about to be converted to a floating point value, 7714// just load it on the floating point unit. 7715// These patterns are more complex because floating point loads do not 7716// support sign extension. 7717// The sign extension has to be explicitly added and is only supported for 7718// one step: byte-to-half, half-to-word, word-to-doubleword. 7719// SCVTF GPR -> FPR is 9 cycles. 7720// SCVTF FPR -> FPR is 4 cyclces. 7721// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. 7722// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR 7723// and still being faster. 7724// However, this is not good for code size. 7725// 8-bits -> float. 2 sizes step-up. 7726class SExtLoadi8CVTf32Pat<dag addrmode, dag INST> 7727 : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), 7728 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7729 (SSHLLv4i16_shift 7730 (f64 7731 (EXTRACT_SUBREG 7732 (SSHLLv8i8_shift 7733 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7734 INST, 7735 bsub), 7736 0), 7737 dsub)), 7738 0), 7739 ssub)))>, 7740 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7741 7742def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), 7743 (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; 7744def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), 7745 (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; 7746def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), 7747 (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; 7748def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), 7749 (LDURBi GPR64sp:$Rn, simm9:$offset)>; 7750 7751// 16-bits -> float. 1 size step-up. 7752class SExtLoadi16CVTf32Pat<dag addrmode, dag INST> 7753 : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7754 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7755 (SSHLLv4i16_shift 7756 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7757 INST, 7758 hsub), 7759 0), 7760 ssub)))>, 7761 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7762 7763def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7764 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7765def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7766 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7767def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7768 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7769def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7770 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7771 7772// 32-bits to 32-bits are handled in target specific dag combine: 7773// performIntToFpCombine. 7774// 64-bits integer to 32-bits floating point, not possible with 7775// SCVTF on floating point registers (both source and destination 7776// must have the same size). 7777 7778// Here are the patterns for 8, 16, 32, and 64-bits to double. 7779// 8-bits -> double. 3 size step-up: give up. 7780// 16-bits -> double. 2 size step. 7781class SExtLoadi16CVTf64Pat<dag addrmode, dag INST> 7782 : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7783 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7784 (SSHLLv2i32_shift 7785 (f64 7786 (EXTRACT_SUBREG 7787 (SSHLLv4i16_shift 7788 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7789 INST, 7790 hsub), 7791 0), 7792 dsub)), 7793 0), 7794 dsub)))>, 7795 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7796 7797def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7798 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7799def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7800 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7801def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7802 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7803def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7804 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7805// 32-bits -> double. 1 size step-up. 7806class SExtLoadi32CVTf64Pat<dag addrmode, dag INST> 7807 : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), 7808 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7809 (SSHLLv2i32_shift 7810 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7811 INST, 7812 ssub), 7813 0), 7814 dsub)))>, 7815 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7816 7817def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), 7818 (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; 7819def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), 7820 (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; 7821def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), 7822 (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; 7823def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), 7824 (LDURSi GPR64sp:$Rn, simm9:$offset)>; 7825 7826// 64-bits -> double are handled in target specific dag combine: 7827// performIntToFpCombine. 7828 7829 7830//---------------------------------------------------------------------------- 7831// AdvSIMD Load-Store Structure 7832//---------------------------------------------------------------------------- 7833defm LD1 : SIMDLd1Multiple<"ld1">; 7834defm LD2 : SIMDLd2Multiple<"ld2">; 7835defm LD3 : SIMDLd3Multiple<"ld3">; 7836defm LD4 : SIMDLd4Multiple<"ld4">; 7837 7838defm ST1 : SIMDSt1Multiple<"st1">; 7839defm ST2 : SIMDSt2Multiple<"st2">; 7840defm ST3 : SIMDSt3Multiple<"st3">; 7841defm ST4 : SIMDSt4Multiple<"st4">; 7842 7843class Ld1Pat<ValueType ty, Instruction INST> 7844 : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; 7845 7846def : Ld1Pat<v16i8, LD1Onev16b>; 7847def : Ld1Pat<v8i16, LD1Onev8h>; 7848def : Ld1Pat<v4i32, LD1Onev4s>; 7849def : Ld1Pat<v2i64, LD1Onev2d>; 7850def : Ld1Pat<v8i8, LD1Onev8b>; 7851def : Ld1Pat<v4i16, LD1Onev4h>; 7852def : Ld1Pat<v2i32, LD1Onev2s>; 7853def : Ld1Pat<v1i64, LD1Onev1d>; 7854 7855class St1Pat<ValueType ty, Instruction INST> 7856 : Pat<(store ty:$Vt, GPR64sp:$Rn), 7857 (INST ty:$Vt, GPR64sp:$Rn)>; 7858 7859def : St1Pat<v16i8, ST1Onev16b>; 7860def : St1Pat<v8i16, ST1Onev8h>; 7861def : St1Pat<v4i32, ST1Onev4s>; 7862def : St1Pat<v2i64, ST1Onev2d>; 7863def : St1Pat<v8i8, ST1Onev8b>; 7864def : St1Pat<v4i16, ST1Onev4h>; 7865def : St1Pat<v2i32, ST1Onev2s>; 7866def : St1Pat<v1i64, ST1Onev1d>; 7867 7868//--- 7869// Single-element 7870//--- 7871 7872defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; 7873defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; 7874defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; 7875defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; 7876let mayLoad = 1, hasSideEffects = 0 in { 7877defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; 7878defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; 7879defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; 7880defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; 7881defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; 7882defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; 7883defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; 7884defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; 7885defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; 7886defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; 7887defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; 7888defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; 7889defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; 7890defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; 7891defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; 7892defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; 7893} 7894 7895def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7896 (LD1Rv8b GPR64sp:$Rn)>; 7897def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7898 (LD1Rv16b GPR64sp:$Rn)>; 7899def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7900 (LD1Rv4h GPR64sp:$Rn)>; 7901def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7902 (LD1Rv8h GPR64sp:$Rn)>; 7903def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7904 (LD1Rv2s GPR64sp:$Rn)>; 7905def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7906 (LD1Rv4s GPR64sp:$Rn)>; 7907def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7908 (LD1Rv2d GPR64sp:$Rn)>; 7909def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7910 (LD1Rv1d GPR64sp:$Rn)>; 7911 7912def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7913 (LD1Rv8b GPR64sp:$Rn)>; 7914def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))), 7915 (LD1Rv16b GPR64sp:$Rn)>; 7916def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7917 (LD1Rv4h GPR64sp:$Rn)>; 7918def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))), 7919 (LD1Rv8h GPR64sp:$Rn)>; 7920def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7921 (LD1Rv2s GPR64sp:$Rn)>; 7922def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))), 7923 (LD1Rv4s GPR64sp:$Rn)>; 7924def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))), 7925 (LD1Rv2d GPR64sp:$Rn)>; 7926 7927// Grab the floating point version too 7928def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7929 (LD1Rv2s GPR64sp:$Rn)>; 7930def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7931 (LD1Rv4s GPR64sp:$Rn)>; 7932def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7933 (LD1Rv2d GPR64sp:$Rn)>; 7934def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7935 (LD1Rv1d GPR64sp:$Rn)>; 7936def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7937 (LD1Rv4h GPR64sp:$Rn)>; 7938def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7939 (LD1Rv8h GPR64sp:$Rn)>; 7940def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7941 (LD1Rv4h GPR64sp:$Rn)>; 7942def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7943 (LD1Rv8h GPR64sp:$Rn)>; 7944 7945class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, 7946 ValueType VTy, ValueType STy, Instruction LD1> 7947 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7948 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7949 (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; 7950 7951def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; 7952def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; 7953def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; 7954def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; 7955def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; 7956def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; 7957def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>; 7958def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>; 7959 7960// Generate LD1 for extload if memory type does not match the 7961// destination type, for example: 7962// 7963// (v4i32 (insert_vector_elt (load anyext from i8) idx)) 7964// 7965// In this case, the index must be adjusted to match LD1 type. 7966// 7967class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand 7968 VecIndex, ValueType VTy, ValueType STy, 7969 Instruction LD1, SDNodeXForm IdxOp> 7970 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7971 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7972 (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>; 7973 7974class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex, 7975 ValueType VTy, ValueType STy, Instruction LD1, 7976 SDNodeXForm IdxOp> 7977 : Pat<(vector_insert (VTy VecListOne64:$Rd), 7978 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7979 (EXTRACT_SUBREG 7980 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 7981 (IdxOp VecIndex:$idx), GPR64sp:$Rn), 7982 dsub)>; 7983 7984def VectorIndexStoH : SDNodeXForm<imm, [{ 7985 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7986}]>; 7987def VectorIndexStoB : SDNodeXForm<imm, [{ 7988 return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64); 7989}]>; 7990def VectorIndexHtoB : SDNodeXForm<imm, [{ 7991 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7992}]>; 7993 7994def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>; 7995def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>; 7996def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>; 7997 7998def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>; 7999def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>; 8000def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>; 8001 8002// Same as above, but the first element is populated using 8003// scalar_to_vector + insert_subvector instead of insert_vector_elt. 8004let Predicates = [IsNeonAvailable] in { 8005 class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy, 8006 SDPatternOperator ExtLoad, Instruction LD1> 8007 : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))), 8008 (ResultTy (EXTRACT_SUBREG 8009 (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>; 8010 8011 def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>; 8012 def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>; 8013 def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>; 8014} 8015class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, 8016 ValueType VTy, ValueType STy, Instruction LD1> 8017 : Pat<(vector_insert (VTy VecListOne64:$Rd), 8018 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 8019 (EXTRACT_SUBREG 8020 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 8021 VecIndex:$idx, GPR64sp:$Rn), 8022 dsub)>; 8023 8024def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; 8025def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; 8026def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; 8027def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; 8028def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>; 8029def : Ld1Lane64Pat<load, VectorIndexH, v4bf16, bf16, LD1i16>; 8030 8031 8032defm LD1 : SIMDLdSt1SingleAliases<"ld1">; 8033defm LD2 : SIMDLdSt2SingleAliases<"ld2">; 8034defm LD3 : SIMDLdSt3SingleAliases<"ld3">; 8035defm LD4 : SIMDLdSt4SingleAliases<"ld4">; 8036 8037// Stores 8038defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; 8039defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; 8040defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; 8041defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; 8042 8043let AddedComplexity = 19 in 8044class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, 8045 ValueType VTy, ValueType STy, Instruction ST1> 8046 : Pat<(scalar_store 8047 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 8048 GPR64sp:$Rn), 8049 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; 8050 8051def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; 8052def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; 8053def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; 8054def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; 8055def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; 8056def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; 8057def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>; 8058def : St1Lane128Pat<store, VectorIndexH, v8bf16, bf16, ST1i16>; 8059 8060let AddedComplexity = 19 in 8061class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, 8062 ValueType VTy, ValueType STy, Instruction ST1> 8063 : Pat<(scalar_store 8064 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 8065 GPR64sp:$Rn), 8066 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 8067 VecIndex:$idx, GPR64sp:$Rn)>; 8068 8069def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; 8070def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; 8071def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; 8072def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; 8073def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>; 8074def : St1Lane64Pat<store, VectorIndexH, v4bf16, bf16, ST1i16>; 8075 8076multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, 8077 ValueType VTy, ValueType STy, Instruction ST1, 8078 int offset> { 8079 def : Pat<(scalar_store 8080 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 8081 GPR64sp:$Rn, offset), 8082 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 8083 VecIndex:$idx, GPR64sp:$Rn, XZR)>; 8084 8085 def : Pat<(scalar_store 8086 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 8087 GPR64sp:$Rn, GPR64:$Rm), 8088 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 8089 VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 8090} 8091 8092defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; 8093defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, 8094 2>; 8095defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; 8096defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; 8097defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; 8098defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; 8099defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>; 8100defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>; 8101 8102multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, 8103 ValueType VTy, ValueType STy, Instruction ST1, 8104 int offset> { 8105 def : Pat<(scalar_store 8106 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 8107 GPR64sp:$Rn, offset), 8108 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; 8109 8110 def : Pat<(scalar_store 8111 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 8112 GPR64sp:$Rn, GPR64:$Rm), 8113 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 8114} 8115 8116defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, 8117 1>; 8118defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, 8119 2>; 8120defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; 8121defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; 8122defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; 8123defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; 8124defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>; 8125defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>; 8126 8127let mayStore = 1, hasSideEffects = 0 in { 8128defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; 8129defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; 8130defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; 8131defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; 8132defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; 8133defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; 8134defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; 8135defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; 8136defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; 8137defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; 8138defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; 8139defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; 8140} 8141 8142defm ST1 : SIMDLdSt1SingleAliases<"st1">; 8143defm ST2 : SIMDLdSt2SingleAliases<"st2">; 8144defm ST3 : SIMDLdSt3SingleAliases<"st3">; 8145defm ST4 : SIMDLdSt4SingleAliases<"st4">; 8146 8147//---------------------------------------------------------------------------- 8148// Crypto extensions 8149//---------------------------------------------------------------------------- 8150 8151let Predicates = [HasAES] in { 8152def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; 8153def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; 8154def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; 8155def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; 8156} 8157 8158// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required 8159// for AES fusion on some CPUs. 8160let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 8161def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 8162 Sched<[WriteVq]>; 8163def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 8164 Sched<[WriteVq]>; 8165} 8166 8167// Only use constrained versions of AES(I)MC instructions if they are paired with 8168// AESE/AESD. 8169def : Pat<(v16i8 (int_aarch64_crypto_aesmc 8170 (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), 8171 (v16i8 V128:$src2))))), 8172 (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), 8173 (v16i8 V128:$src2)))))>, 8174 Requires<[HasFuseAES]>; 8175 8176def : Pat<(v16i8 (int_aarch64_crypto_aesimc 8177 (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), 8178 (v16i8 V128:$src2))))), 8179 (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), 8180 (v16i8 V128:$src2)))))>, 8181 Requires<[HasFuseAES]>; 8182 8183let Predicates = [HasSHA2] in { 8184def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; 8185def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; 8186def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; 8187def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; 8188def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; 8189def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; 8190def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; 8191 8192def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; 8193def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; 8194def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; 8195} 8196 8197//---------------------------------------------------------------------------- 8198// Compiler-pseudos 8199//---------------------------------------------------------------------------- 8200// FIXME: Like for X86, these should go in their own separate .td file. 8201 8202// For an anyext, we don't care what the high bits are, so we can perform an 8203// INSERT_SUBREF into an IMPLICIT_DEF. 8204def : Pat<(i64 (anyext GPR32:$src)), 8205 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; 8206 8207// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and 8208// then assert the extension has happened. 8209def : Pat<(i64 (zext GPR32:$src)), 8210 (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 8211 8212// To sign extend, we use a signed bitfield move instruction (SBFM) on the 8213// containing super-reg. 8214def : Pat<(i64 (sext GPR32:$src)), 8215 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 8216def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; 8217def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; 8218def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; 8219def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; 8220def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; 8221def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; 8222def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; 8223 8224def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), 8225 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 8226 (i64 (i32shift_sext_i8 imm0_31:$imm)))>; 8227def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), 8228 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 8229 (i64 (i64shift_sext_i8 imm0_63:$imm)))>; 8230 8231def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), 8232 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 8233 (i64 (i32shift_sext_i16 imm0_31:$imm)))>; 8234def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), 8235 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 8236 (i64 (i64shift_sext_i16 imm0_63:$imm)))>; 8237 8238def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), 8239 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 8240 (i64 (i64shift_a imm0_63:$imm)), 8241 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 8242 8243def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)), 8244 (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 8245 (i64 (i64shift_a imm0_63:$imm)), 8246 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 8247 8248// sra patterns have an AddedComplexity of 10, so make sure we have a higher 8249// AddedComplexity for the following patterns since we want to match sext + sra 8250// patterns before we attempt to match a single sra node. 8251let AddedComplexity = 20 in { 8252// We support all sext + sra combinations which preserve at least one bit of the 8253// original value which is to be sign extended. E.g. we support shifts up to 8254// bitwidth-1 bits. 8255def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), 8256 (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; 8257def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), 8258 (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; 8259 8260def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), 8261 (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; 8262def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), 8263 (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; 8264 8265def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), 8266 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 8267 (i64 imm0_31:$imm), 31)>; 8268} // AddedComplexity = 20 8269 8270// To truncate, we can simply extract from a subregister. 8271def : Pat<(i32 (trunc GPR64sp:$src)), 8272 (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; 8273 8274// __builtin_trap() uses the BRK instruction on AArch64. 8275def : Pat<(trap), (BRK 1)>; 8276def : Pat<(debugtrap), (BRK 0xF000)>; 8277 8278def ubsan_trap_xform : SDNodeXForm<timm, [{ 8279 return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32); 8280}]>; 8281 8282def ubsan_trap_imm : TImmLeaf<i32, [{ 8283 return isUInt<8>(Imm); 8284}], ubsan_trap_xform>; 8285 8286def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>; 8287 8288// Multiply high patterns which multiply the lower subvector using smull/umull 8289// and the upper subvector with smull2/umull2. Then shuffle the high the high 8290// part of both results together. 8291def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)), 8292 (UZP2v16i8 8293 (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 8294 (EXTRACT_SUBREG V128:$Rm, dsub)), 8295 (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 8296def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)), 8297 (UZP2v8i16 8298 (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 8299 (EXTRACT_SUBREG V128:$Rm, dsub)), 8300 (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 8301def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)), 8302 (UZP2v4i32 8303 (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 8304 (EXTRACT_SUBREG V128:$Rm, dsub)), 8305 (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 8306 8307def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)), 8308 (UZP2v16i8 8309 (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 8310 (EXTRACT_SUBREG V128:$Rm, dsub)), 8311 (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 8312def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)), 8313 (UZP2v8i16 8314 (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 8315 (EXTRACT_SUBREG V128:$Rm, dsub)), 8316 (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 8317def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)), 8318 (UZP2v4i32 8319 (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 8320 (EXTRACT_SUBREG V128:$Rm, dsub)), 8321 (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 8322 8323// Conversions within AdvSIMD types in the same register size are free. 8324// But because we need a consistent lane ordering, in big endian many 8325// conversions require one or more REV instructions. 8326// 8327// Consider a simple memory load followed by a bitconvert then a store. 8328// v0 = load v2i32 8329// v1 = BITCAST v2i32 v0 to v4i16 8330// store v4i16 v2 8331// 8332// In big endian mode every memory access has an implicit byte swap. LDR and 8333// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that 8334// is, they treat the vector as a sequence of elements to be byte-swapped. 8335// The two pairs of instructions are fundamentally incompatible. We've decided 8336// to use LD1/ST1 only to simplify compiler implementation. 8337// 8338// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes 8339// the original code sequence: 8340// v0 = load v2i32 8341// v1 = REV v2i32 (implicit) 8342// v2 = BITCAST v2i32 v1 to v4i16 8343// v3 = REV v4i16 v2 (implicit) 8344// store v4i16 v3 8345// 8346// But this is now broken - the value stored is different to the value loaded 8347// due to lane reordering. To fix this, on every BITCAST we must perform two 8348// other REVs: 8349// v0 = load v2i32 8350// v1 = REV v2i32 (implicit) 8351// v2 = REV v2i32 8352// v3 = BITCAST v2i32 v2 to v4i16 8353// v4 = REV v4i16 8354// v5 = REV v4i16 v4 (implicit) 8355// store v4i16 v5 8356// 8357// This means an extra two instructions, but actually in most cases the two REV 8358// instructions can be combined into one. For example: 8359// (REV64_2s (REV64_4h X)) === (REV32_4h X) 8360// 8361// There is also no 128-bit REV instruction. This must be synthesized with an 8362// EXT instruction. 8363// 8364// Most bitconverts require some sort of conversion. The only exceptions are: 8365// a) Identity conversions - vNfX <-> vNiX 8366// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX 8367// 8368 8369// Natural vector casts (64 bit) 8370foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 8371 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 8372 def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))), 8373 (VT FPR64:$src)>; 8374 8375// Natural vector casts (128 bit) 8376foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 8377 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 8378 def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))), 8379 (VT FPR128:$src)>; 8380 8381let Predicates = [IsLE] in { 8382def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8383def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8384def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8385def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8386def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8387def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8388 8389def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 8390 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8391def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 8392 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8393def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 8394 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8395def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 8396 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8397def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 8398 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8399def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 8400 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8401def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 8402 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8403} 8404let Predicates = [IsBE] in { 8405def : Pat<(v8i8 (bitconvert GPR64:$Xn)), 8406 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8407def : Pat<(v4i16 (bitconvert GPR64:$Xn)), 8408 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8409def : Pat<(v2i32 (bitconvert GPR64:$Xn)), 8410 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8411def : Pat<(v4f16 (bitconvert GPR64:$Xn)), 8412 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8413def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), 8414 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8415def : Pat<(v2f32 (bitconvert GPR64:$Xn)), 8416 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8417 8418def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 8419 (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8420def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 8421 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8422def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 8423 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8424def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 8425 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8426def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 8427 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8428def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 8429 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8430} 8431def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8432def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8433def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), 8434 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8435def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), 8436 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8437def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), 8438 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8439def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; 8440 8441def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), 8442 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; 8443def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), 8444 (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; 8445def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), 8446 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8447def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), 8448 (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; 8449def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 8450 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8451 8452def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>; 8453def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>; 8454 8455let Predicates = [IsLE] in { 8456def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; 8457def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; 8458def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; 8459def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; 8460def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>; 8461def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; 8462} 8463let Predicates = [IsBE] in { 8464def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), 8465 (v1i64 (REV64v2i32 FPR64:$src))>; 8466def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), 8467 (v1i64 (REV64v4i16 FPR64:$src))>; 8468def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), 8469 (v1i64 (REV64v8i8 FPR64:$src))>; 8470def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), 8471 (v1i64 (REV64v4i16 FPR64:$src))>; 8472def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), 8473 (v1i64 (REV64v4i16 FPR64:$src))>; 8474def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), 8475 (v1i64 (REV64v2i32 FPR64:$src))>; 8476} 8477def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; 8478def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 8479 8480let Predicates = [IsLE] in { 8481def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; 8482def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; 8483def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; 8484def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 8485def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; 8486def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; 8487def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>; 8488} 8489let Predicates = [IsBE] in { 8490def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), 8491 (v2i32 (REV64v2i32 FPR64:$src))>; 8492def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), 8493 (v2i32 (REV32v4i16 FPR64:$src))>; 8494def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), 8495 (v2i32 (REV32v8i8 FPR64:$src))>; 8496def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), 8497 (v2i32 (REV64v2i32 FPR64:$src))>; 8498def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), 8499 (v2i32 (REV64v2i32 FPR64:$src))>; 8500def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), 8501 (v2i32 (REV32v4i16 FPR64:$src))>; 8502def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), 8503 (v2i32 (REV32v4i16 FPR64:$src))>; 8504} 8505def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; 8506 8507let Predicates = [IsLE] in { 8508def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; 8509def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; 8510def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; 8511def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 8512def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; 8513def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; 8514} 8515let Predicates = [IsBE] in { 8516def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), 8517 (v4i16 (REV64v4i16 FPR64:$src))>; 8518def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), 8519 (v4i16 (REV32v4i16 FPR64:$src))>; 8520def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), 8521 (v4i16 (REV16v8i8 FPR64:$src))>; 8522def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), 8523 (v4i16 (REV64v4i16 FPR64:$src))>; 8524def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), 8525 (v4i16 (REV32v4i16 FPR64:$src))>; 8526def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), 8527 (v4i16 (REV64v4i16 FPR64:$src))>; 8528} 8529def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; 8530def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>; 8531 8532let Predicates = [IsLE] in { 8533def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; 8534def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; 8535def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; 8536def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; 8537def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; 8538def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; 8539 8540def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8541def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; 8542def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; 8543def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8544def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>; 8545def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8546} 8547let Predicates = [IsBE] in { 8548def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), 8549 (v4f16 (REV64v4i16 FPR64:$src))>; 8550def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), 8551 (v4f16 (REV32v4i16 FPR64:$src))>; 8552def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), 8553 (v4f16 (REV16v8i8 FPR64:$src))>; 8554def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), 8555 (v4f16 (REV64v4i16 FPR64:$src))>; 8556def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), 8557 (v4f16 (REV32v4i16 FPR64:$src))>; 8558def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), 8559 (v4f16 (REV64v4i16 FPR64:$src))>; 8560 8561def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), 8562 (v4bf16 (REV64v4i16 FPR64:$src))>; 8563def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), 8564 (v4bf16 (REV32v4i16 FPR64:$src))>; 8565def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), 8566 (v4bf16 (REV16v8i8 FPR64:$src))>; 8567def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), 8568 (v4bf16 (REV64v4i16 FPR64:$src))>; 8569def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), 8570 (v4bf16 (REV32v4i16 FPR64:$src))>; 8571def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), 8572 (v4bf16 (REV64v4i16 FPR64:$src))>; 8573} 8574def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; 8575def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; 8576 8577let Predicates = [IsLE] in { 8578def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; 8579def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; 8580def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; 8581def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 8582def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; 8583def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; 8584def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; 8585def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>; 8586} 8587let Predicates = [IsBE] in { 8588def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), 8589 (v8i8 (REV64v8i8 FPR64:$src))>; 8590def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), 8591 (v8i8 (REV32v8i8 FPR64:$src))>; 8592def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), 8593 (v8i8 (REV16v8i8 FPR64:$src))>; 8594def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), 8595 (v8i8 (REV64v8i8 FPR64:$src))>; 8596def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), 8597 (v8i8 (REV32v8i8 FPR64:$src))>; 8598def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), 8599 (v8i8 (REV64v8i8 FPR64:$src))>; 8600def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), 8601 (v8i8 (REV16v8i8 FPR64:$src))>; 8602def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), 8603 (v8i8 (REV16v8i8 FPR64:$src))>; 8604} 8605 8606let Predicates = [IsLE] in { 8607def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; 8608def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; 8609def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; 8610def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; 8611def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; 8612def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>; 8613} 8614let Predicates = [IsBE] in { 8615def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), 8616 (f64 (REV64v2i32 FPR64:$src))>; 8617def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), 8618 (f64 (REV64v4i16 FPR64:$src))>; 8619def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), 8620 (f64 (REV64v2i32 FPR64:$src))>; 8621def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), 8622 (f64 (REV64v8i8 FPR64:$src))>; 8623def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), 8624 (f64 (REV64v4i16 FPR64:$src))>; 8625def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), 8626 (f64 (REV64v4i16 FPR64:$src))>; 8627} 8628def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; 8629def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; 8630 8631let Predicates = [IsLE] in { 8632def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; 8633def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; 8634def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; 8635def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; 8636def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; 8637def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>; 8638} 8639let Predicates = [IsBE] in { 8640def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), 8641 (v1f64 (REV64v2i32 FPR64:$src))>; 8642def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), 8643 (v1f64 (REV64v4i16 FPR64:$src))>; 8644def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), 8645 (v1f64 (REV64v8i8 FPR64:$src))>; 8646def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), 8647 (v1f64 (REV64v2i32 FPR64:$src))>; 8648def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), 8649 (v1f64 (REV64v4i16 FPR64:$src))>; 8650def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), 8651 (v1f64 (REV64v4i16 FPR64:$src))>; 8652} 8653def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; 8654def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 8655 8656let Predicates = [IsLE] in { 8657def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; 8658def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; 8659def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; 8660def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; 8661def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 8662def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; 8663def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>; 8664} 8665let Predicates = [IsBE] in { 8666def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), 8667 (v2f32 (REV64v2i32 FPR64:$src))>; 8668def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), 8669 (v2f32 (REV32v4i16 FPR64:$src))>; 8670def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), 8671 (v2f32 (REV32v8i8 FPR64:$src))>; 8672def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), 8673 (v2f32 (REV64v2i32 FPR64:$src))>; 8674def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), 8675 (v2f32 (REV64v2i32 FPR64:$src))>; 8676def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), 8677 (v2f32 (REV32v4i16 FPR64:$src))>; 8678def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), 8679 (v2f32 (REV32v4i16 FPR64:$src))>; 8680} 8681def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; 8682 8683let Predicates = [IsLE] in { 8684def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; 8685def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; 8686def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; 8687def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; 8688def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; 8689def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; 8690def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>; 8691def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; 8692} 8693let Predicates = [IsBE] in { 8694def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), 8695 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8696def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), 8697 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8698 (REV64v4i32 FPR128:$src), (i32 8)))>; 8699def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), 8700 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8701 (REV64v8i16 FPR128:$src), (i32 8)))>; 8702def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), 8703 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8704 (REV64v8i16 FPR128:$src), (i32 8)))>; 8705def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), 8706 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8707 (REV64v8i16 FPR128:$src), (i32 8)))>; 8708def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), 8709 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8710def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), 8711 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8712 (REV64v4i32 FPR128:$src), (i32 8)))>; 8713def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), 8714 (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), 8715 (REV64v16i8 FPR128:$src), (i32 8)))>; 8716} 8717 8718let Predicates = [IsLE] in { 8719def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; 8720def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; 8721def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; 8722def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; 8723def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>; 8724def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; 8725def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; 8726} 8727let Predicates = [IsBE] in { 8728def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), 8729 (v2f64 (EXTv16i8 FPR128:$src, 8730 FPR128:$src, (i32 8)))>; 8731def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), 8732 (v2f64 (REV64v4i32 FPR128:$src))>; 8733def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), 8734 (v2f64 (REV64v8i16 FPR128:$src))>; 8735def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), 8736 (v2f64 (REV64v8i16 FPR128:$src))>; 8737def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), 8738 (v2f64 (REV64v8i16 FPR128:$src))>; 8739def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), 8740 (v2f64 (REV64v16i8 FPR128:$src))>; 8741def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), 8742 (v2f64 (REV64v4i32 FPR128:$src))>; 8743} 8744def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; 8745 8746let Predicates = [IsLE] in { 8747def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; 8748def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; 8749def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; 8750def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>; 8751def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; 8752def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; 8753def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; 8754} 8755let Predicates = [IsBE] in { 8756def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), 8757 (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8758 (REV64v4i32 FPR128:$src), (i32 8)))>; 8759def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), 8760 (v4f32 (REV32v8i16 FPR128:$src))>; 8761def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), 8762 (v4f32 (REV32v8i16 FPR128:$src))>; 8763def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), 8764 (v4f32 (REV32v8i16 FPR128:$src))>; 8765def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), 8766 (v4f32 (REV32v16i8 FPR128:$src))>; 8767def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), 8768 (v4f32 (REV64v4i32 FPR128:$src))>; 8769def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), 8770 (v4f32 (REV64v4i32 FPR128:$src))>; 8771} 8772def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; 8773 8774let Predicates = [IsLE] in { 8775def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; 8776def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; 8777def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; 8778def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; 8779def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; 8780def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; 8781def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>; 8782} 8783let Predicates = [IsBE] in { 8784def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), 8785 (v2i64 (EXTv16i8 FPR128:$src, 8786 FPR128:$src, (i32 8)))>; 8787def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), 8788 (v2i64 (REV64v4i32 FPR128:$src))>; 8789def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), 8790 (v2i64 (REV64v8i16 FPR128:$src))>; 8791def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), 8792 (v2i64 (REV64v16i8 FPR128:$src))>; 8793def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), 8794 (v2i64 (REV64v4i32 FPR128:$src))>; 8795def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), 8796 (v2i64 (REV64v8i16 FPR128:$src))>; 8797def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), 8798 (v2i64 (REV64v8i16 FPR128:$src))>; 8799} 8800def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; 8801 8802let Predicates = [IsLE] in { 8803def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; 8804def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; 8805def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; 8806def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; 8807def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; 8808def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; 8809def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>; 8810} 8811let Predicates = [IsBE] in { 8812def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), 8813 (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8814 (REV64v4i32 FPR128:$src), 8815 (i32 8)))>; 8816def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), 8817 (v4i32 (REV64v4i32 FPR128:$src))>; 8818def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), 8819 (v4i32 (REV32v8i16 FPR128:$src))>; 8820def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), 8821 (v4i32 (REV32v16i8 FPR128:$src))>; 8822def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), 8823 (v4i32 (REV64v4i32 FPR128:$src))>; 8824def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), 8825 (v4i32 (REV32v8i16 FPR128:$src))>; 8826def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), 8827 (v4i32 (REV32v8i16 FPR128:$src))>; 8828} 8829def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; 8830 8831let Predicates = [IsLE] in { 8832def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; 8833def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; 8834def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; 8835def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; 8836def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; 8837def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; 8838} 8839let Predicates = [IsBE] in { 8840def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), 8841 (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8842 (REV64v8i16 FPR128:$src), 8843 (i32 8)))>; 8844def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), 8845 (v8i16 (REV64v8i16 FPR128:$src))>; 8846def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), 8847 (v8i16 (REV32v8i16 FPR128:$src))>; 8848def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), 8849 (v8i16 (REV16v16i8 FPR128:$src))>; 8850def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), 8851 (v8i16 (REV64v8i16 FPR128:$src))>; 8852def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), 8853 (v8i16 (REV32v8i16 FPR128:$src))>; 8854} 8855def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; 8856def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>; 8857 8858let Predicates = [IsLE] in { 8859def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; 8860def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; 8861def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; 8862def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; 8863def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; 8864def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; 8865 8866def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>; 8867def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8868def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8869def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; 8870def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8871def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8872} 8873let Predicates = [IsBE] in { 8874def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), 8875 (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8876 (REV64v8i16 FPR128:$src), 8877 (i32 8)))>; 8878def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), 8879 (v8f16 (REV64v8i16 FPR128:$src))>; 8880def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), 8881 (v8f16 (REV32v8i16 FPR128:$src))>; 8882def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), 8883 (v8f16 (REV16v16i8 FPR128:$src))>; 8884def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), 8885 (v8f16 (REV64v8i16 FPR128:$src))>; 8886def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), 8887 (v8f16 (REV32v8i16 FPR128:$src))>; 8888 8889def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), 8890 (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8891 (REV64v8i16 FPR128:$src), 8892 (i32 8)))>; 8893def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), 8894 (v8bf16 (REV64v8i16 FPR128:$src))>; 8895def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), 8896 (v8bf16 (REV32v8i16 FPR128:$src))>; 8897def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), 8898 (v8bf16 (REV16v16i8 FPR128:$src))>; 8899def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), 8900 (v8bf16 (REV64v8i16 FPR128:$src))>; 8901def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), 8902 (v8bf16 (REV32v8i16 FPR128:$src))>; 8903} 8904def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; 8905def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; 8906 8907let Predicates = [IsLE] in { 8908def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; 8909def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; 8910def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; 8911def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; 8912def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; 8913def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; 8914def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; 8915def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>; 8916} 8917let Predicates = [IsBE] in { 8918def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), 8919 (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), 8920 (REV64v16i8 FPR128:$src), 8921 (i32 8)))>; 8922def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), 8923 (v16i8 (REV64v16i8 FPR128:$src))>; 8924def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), 8925 (v16i8 (REV32v16i8 FPR128:$src))>; 8926def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), 8927 (v16i8 (REV16v16i8 FPR128:$src))>; 8928def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), 8929 (v16i8 (REV64v16i8 FPR128:$src))>; 8930def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), 8931 (v16i8 (REV32v16i8 FPR128:$src))>; 8932def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), 8933 (v16i8 (REV16v16i8 FPR128:$src))>; 8934def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), 8935 (v16i8 (REV16v16i8 FPR128:$src))>; 8936} 8937 8938def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), 8939 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8940def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), 8941 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8942def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), 8943 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8944def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), 8945 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8946def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))), 8947 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8948def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), 8949 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8950def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), 8951 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8952def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), 8953 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8954 8955def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), 8956 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8957def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), 8958 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8959def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), 8960 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8961def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), 8962 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8963 8964// A 64-bit subvector insert to the first 128-bit vector position 8965// is a subregister copy that needs no instruction. 8966multiclass InsertSubvectorUndef<ValueType Ty> { 8967 def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), 8968 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8969 def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), 8970 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8971 def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), 8972 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8973 def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), 8974 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8975 def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), 8976 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8977 def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), 8978 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8979 def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)), 8980 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8981 def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), 8982 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8983} 8984 8985defm : InsertSubvectorUndef<i32>; 8986defm : InsertSubvectorUndef<i64>; 8987 8988// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 8989// or v2f32. 8990def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), 8991 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), 8992 (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; 8993def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), 8994 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), 8995 (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; 8996 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, 8997 // so we match on v4f32 here, not v2f32. This will also catch adding 8998 // the low two lanes of a true v4f32 vector. 8999def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), 9000 (vector_extract (v4f32 FPR128:$Rn), (i64 1))), 9001 (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 9002def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), 9003 (vector_extract (v8f16 FPR128:$Rn), (i64 1))), 9004 (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 9005 9006// Prefer using the bottom lanes of addp Rn, Rn compared to 9007// addp extractlow(Rn), extracthigh(Rn) 9008def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))), 9009 (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))), 9010 (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>; 9011def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))), 9012 (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))), 9013 (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>; 9014def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))), 9015 (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))), 9016 (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>; 9017 9018def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))), 9019 (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))), 9020 (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>; 9021def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))), 9022 (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))), 9023 (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>; 9024 9025// Scalar 64-bit shifts in FPR64 registers. 9026def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 9027 (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 9028def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 9029 (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 9030def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 9031 (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 9032def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 9033 (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 9034 9035// Patterns for nontemporal/no-allocate stores. 9036// We have to resort to tricks to turn a single-input store into a store pair, 9037// because there is no single-input nontemporal store, only STNP. 9038let Predicates = [IsLE] in { 9039let AddedComplexity = 15 in { 9040class NTStore128Pat<ValueType VT> : 9041 Pat<(nontemporalstore (VT FPR128:$Rt), 9042 (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 9043 (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), 9044 (DUPi64 FPR128:$Rt, (i64 1)), 9045 GPR64sp:$Rn, simm7s8:$offset)>; 9046 9047def : NTStore128Pat<v2i64>; 9048def : NTStore128Pat<v4i32>; 9049def : NTStore128Pat<v8i16>; 9050def : NTStore128Pat<v16i8>; 9051 9052class NTStore64Pat<ValueType VT> : 9053 Pat<(nontemporalstore (VT FPR64:$Rt), 9054 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 9055 (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), 9056 (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), 9057 GPR64sp:$Rn, simm7s4:$offset)>; 9058 9059// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? 9060def : NTStore64Pat<v1f64>; 9061def : NTStore64Pat<v1i64>; 9062def : NTStore64Pat<v2i32>; 9063def : NTStore64Pat<v4i16>; 9064def : NTStore64Pat<v8i8>; 9065 9066def : Pat<(nontemporalstore GPR64:$Rt, 9067 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 9068 (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), 9069 (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), 9070 GPR64sp:$Rn, simm7s4:$offset)>; 9071} // AddedComplexity=10 9072} // Predicates = [IsLE] 9073 9074// Tail call return handling. These are all compiler pseudo-instructions, 9075// so no encoding information or anything like that. 9076let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { 9077 def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, 9078 Sched<[WriteBrReg]>; 9079 def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, 9080 Sched<[WriteBrReg]>; 9081 // Indirect tail-call with any register allowed, used by MachineOutliner when 9082 // this is proven safe. 9083 // FIXME: If we have to add any more hacks like this, we should instead relax 9084 // some verifier checks for outlined functions. 9085 def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>, 9086 Sched<[WriteBrReg]>; 9087 // Indirect tail-call limited to only use registers (x16 and x17) which are 9088 // allowed to tail-call a "BTI c" instruction. 9089 def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>, 9090 Sched<[WriteBrReg]>; 9091} 9092 9093def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), 9094 (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>, 9095 Requires<[NotUseBTI]>; 9096def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)), 9097 (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>, 9098 Requires<[UseBTI]>; 9099def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), 9100 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 9101def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), 9102 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 9103 9104def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; 9105def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; 9106 9107// Extracting lane zero is a special case where we can just use a plain 9108// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the 9109// rest of the compiler, especially the register allocator and copy propagation, 9110// to reason about, so is preferred when it's possible to use it. 9111let AddedComplexity = 10 in { 9112 def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; 9113 def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; 9114 def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; 9115} 9116 9117// dot_v4i8 9118class mul_v4i8<SDPatternOperator ldop> : 9119 PatFrag<(ops node:$Rn, node:$Rm, node:$offset), 9120 (mul (ldop (add node:$Rn, node:$offset)), 9121 (ldop (add node:$Rm, node:$offset)))>; 9122class mulz_v4i8<SDPatternOperator ldop> : 9123 PatFrag<(ops node:$Rn, node:$Rm), 9124 (mul (ldop node:$Rn), (ldop node:$Rm))>; 9125 9126def load_v4i8 : 9127 OutPatFrag<(ops node:$R), 9128 (INSERT_SUBREG 9129 (v2i32 (IMPLICIT_DEF)), 9130 (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)), 9131 ssub)>; 9132 9133class dot_v4i8<Instruction DOT, SDPatternOperator ldop> : 9134 Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)), 9135 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)), 9136 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)), 9137 (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))), 9138 (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR), 9139 (load_v4i8 GPR64sp:$Rn), 9140 (load_v4i8 GPR64sp:$Rm))), 9141 sub_32)>, Requires<[HasDotProd]>; 9142 9143// dot_v8i8 9144class ee_v8i8<SDPatternOperator extend> : 9145 PatFrag<(ops node:$V, node:$K), 9146 (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>; 9147 9148class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 9149 PatFrag<(ops node:$M, node:$N, node:$K), 9150 (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)), 9151 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>; 9152 9153class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 9154 PatFrag<(ops node:$M, node:$N), 9155 (i32 (extractelt 9156 (v4i32 (AArch64uaddv 9157 (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)), 9158 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))), 9159 (i64 0)))>; 9160 9161// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 9162def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>; 9163 9164class odot_v8i8<Instruction DOT> : 9165 OutPatFrag<(ops node:$Vm, node:$Vn), 9166 (EXTRACT_SUBREG 9167 (VADDV_32 9168 (i64 (DOT (DUPv2i32gpr WZR), 9169 (v8i8 node:$Vm), 9170 (v8i8 node:$Vn)))), 9171 sub_32)>; 9172 9173class dot_v8i8<Instruction DOT, SDPatternOperator mulop, 9174 SDPatternOperator extend> : 9175 Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn), 9176 (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>, 9177 Requires<[HasDotProd]>; 9178 9179// dot_v16i8 9180class ee_v16i8<SDPatternOperator extend> : 9181 PatFrag<(ops node:$V, node:$K1, node:$K2), 9182 (v4i16 (extract_subvector 9183 (v8i16 (extend 9184 (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>; 9185 9186class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> : 9187 PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2), 9188 (v4i32 9189 (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)), 9190 (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>; 9191 9192class idot_v16i8<SDPatternOperator m, SDPatternOperator x> : 9193 PatFrag<(ops node:$M, node:$N), 9194 (i32 (extractelt 9195 (v4i32 (AArch64uaddv 9196 (add 9197 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)), 9198 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))), 9199 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)), 9200 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))), 9201 (i64 0)))>; 9202 9203class odot_v16i8<Instruction DOT> : 9204 OutPatFrag<(ops node:$Vm, node:$Vn), 9205 (i32 (ADDVv4i32v 9206 (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>; 9207 9208class dot_v16i8<Instruction DOT, SDPatternOperator mulop, 9209 SDPatternOperator extend> : 9210 Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn), 9211 (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>, 9212 Requires<[HasDotProd]>; 9213 9214let AddedComplexity = 10 in { 9215 def : dot_v4i8<SDOTv8i8, sextloadi8>; 9216 def : dot_v4i8<UDOTv8i8, zextloadi8>; 9217 def : dot_v8i8<SDOTv8i8, AArch64smull, sext>; 9218 def : dot_v8i8<UDOTv8i8, AArch64umull, zext>; 9219 def : dot_v16i8<SDOTv16i8, AArch64smull, sext>; 9220 def : dot_v16i8<UDOTv16i8, AArch64umull, zext>; 9221 9222 // FIXME: add patterns to generate vector by element dot product. 9223 // FIXME: add SVE dot-product patterns. 9224} 9225 9226// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs, 9227// so that it can be used as input to inline asm, and vice versa. 9228def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>; 9229def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>; 9230def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, 9231 GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)), 9232 (REG_SEQUENCE GPR64x8Class, 9233 $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, 9234 $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>; 9235foreach i = 0-7 in { 9236 def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))), 9237 (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>; 9238} 9239 9240let Predicates = [HasLS64] in { 9241 def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), 9242 (outs GPR64x8:$Rt)>; 9243 def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn), 9244 (outs)>; 9245 def ST64BV: Store64BV<0b011, "st64bv">; 9246 def ST64BV0: Store64BV<0b010, "st64bv0">; 9247 9248 class ST64BPattern<Intrinsic intrinsic, Instruction instruction> 9249 : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7), 9250 (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>; 9251 9252 def : ST64BPattern<int_aarch64_st64b, ST64B>; 9253 def : ST64BPattern<int_aarch64_st64bv, ST64BV>; 9254 def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>; 9255} 9256 9257let Predicates = [HasMOPS] in { 9258 let Defs = [NZCV] in { 9259 defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; 9260 9261 defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; 9262 9263 defm SETP : MOPSMemorySetInsns<0b00, "setp">; 9264 } 9265 let Uses = [NZCV] in { 9266 defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; 9267 defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; 9268 9269 defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; 9270 defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; 9271 9272 defm SETM : MOPSMemorySetInsns<0b01, "setm">; 9273 defm SETE : MOPSMemorySetInsns<0b10, "sete">; 9274 } 9275} 9276let Predicates = [HasMOPS, HasMTE] in { 9277 let Defs = [NZCV] in { 9278 defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; 9279 } 9280 let Uses = [NZCV] in { 9281 defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; 9282 // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td 9283 defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; 9284 } 9285} 9286 9287// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain 9288// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain 9289def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; 9290def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; 9291def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; 9292def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; 9293def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; 9294 9295// MOPS operations always contain three 4-byte instructions 9296let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { 9297 let mayLoad = 1 in { 9298 def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 9299 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 9300 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 9301 def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 9302 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 9303 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 9304 } 9305 let mayLoad = 0 in { 9306 def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 9307 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 9308 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 9309 } 9310} 9311let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in { 9312 def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 9313 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 9314 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 9315} 9316 9317//----------------------------------------------------------------------------- 9318// v8.3 Pointer Authentication late patterns 9319 9320let Predicates = [HasPAuth] in { 9321def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm), 9322 (MOVKXi GPR64:$Rd, (trunc_imm imm64_0_65535:$imm), 48)>; 9323def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn), 9324 (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>; 9325} 9326 9327//----------------------------------------------------------------------------- 9328 9329// This gets lowered into an instruction sequence of 20 bytes 9330let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in 9331def StoreSwiftAsyncContext 9332 : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), 9333 []>, Sched<[]>; 9334 9335def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>; 9336def : Pat<(AArch64AssertZExtBool GPR32:$op), 9337 (i32 GPR32:$op)>; 9338 9339//===----------------------------===// 9340// 2022 Architecture Extensions: 9341//===----------------------------===// 9342 9343def : InstAlias<"clrbhb", (HINT 22), 0>; 9344let Predicates = [HasCLRBHB] in { 9345 def : InstAlias<"clrbhb", (HINT 22), 1>; 9346} 9347 9348//===----------------------------------------------------------------------===// 9349// Translation Hardening Extension (FEAT_THE) 9350//===----------------------------------------------------------------------===// 9351defm RCW : ReadCheckWriteCompareAndSwap; 9352 9353defm RCWCLR : ReadCheckWriteOperation<0b001, "clr">; 9354defm RCWSET : ReadCheckWriteOperation<0b011, "set">; 9355defm RCWSWP : ReadCheckWriteOperation<0b010, "swp">; 9356 9357//===----------------------------------------------------------------------===// 9358// General Data-Processing Instructions (FEAT_V94_DP) 9359//===----------------------------------------------------------------------===// 9360defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>; 9361defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>; 9362defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>; 9363 9364defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>; 9365defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>; 9366defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>; 9367defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>; 9368 9369def RPRFM: 9370 I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn), 9371 "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>, 9372 Sched<[]> { 9373 bits<6> Rt; 9374 bits<5> Rn; 9375 bits<5> Rm; 9376 let Inst{2-0} = Rt{2-0}; 9377 let Inst{4-3} = 0b11; 9378 let Inst{9-5} = Rn; 9379 let Inst{11-10} = 0b10; 9380 let Inst{13-12} = Rt{4-3}; 9381 let Inst{14} = 0b1; 9382 let Inst{15} = Rt{5}; 9383 let Inst{20-16} = Rm; 9384 let Inst{31-21} = 0b11111000101; 9385 let mayLoad = 0; 9386 let mayStore = 0; 9387 let hasSideEffects = 1; 9388 // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns 9389 // Fail, the decoder should attempt to decode RPRFM. This requires setting 9390 // the decoder namespace to "Fallback". 9391 let DecoderNamespace = "Fallback"; 9392} 9393 9394//===----------------------------------------------------------------------===// 9395// 128-bit Atomics (FEAT_LSE128) 9396//===----------------------------------------------------------------------===// 9397let Predicates = [HasLSE128] in { 9398 def SWPP : LSE128Base<0b000, 0b00, 0b1, "swpp">; 9399 def SWPPA : LSE128Base<0b000, 0b10, 0b1, "swppa">; 9400 def SWPPAL : LSE128Base<0b000, 0b11, 0b1, "swppal">; 9401 def SWPPL : LSE128Base<0b000, 0b01, 0b1, "swppl">; 9402 def LDCLRP : LSE128Base<0b001, 0b00, 0b0, "ldclrp">; 9403 def LDCLRPA : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">; 9404 def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">; 9405 def LDCLRPL : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">; 9406 def LDSETP : LSE128Base<0b011, 0b00, 0b0, "ldsetp">; 9407 def LDSETPA : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">; 9408 def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">; 9409 def LDSETPL : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">; 9410} 9411 9412//===----------------------------------------------------------------------===// 9413// RCPC Instructions (FEAT_LRCPC3) 9414//===----------------------------------------------------------------------===// 9415 9416let Predicates = [HasRCPC3] in { 9417 // size opc opc2 9418 def STILPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">; 9419 def STILPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">; 9420 def STILPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 9421 def STILPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 9422 def LDIAPPWpost: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">; 9423 def LDIAPPXpost: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">; 9424 def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 9425 def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 9426 9427 def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>; 9428 def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>; 9429 9430 // Aliases for when offset=0 9431 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>; 9432 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>; 9433 9434 // size opc 9435 def STLRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">; 9436 def STLRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">; 9437 def LDAPRWpost: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #4", "$Rn = $wback">; 9438 def LDAPRXpost: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #8", "$Rn = $wback">; 9439} 9440 9441let Predicates = [HasRCPC3, HasNEON] in { 9442 // size opc regtype 9443 defm STLURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8 , (outs), (ins FPR8 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9444 defm STLURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9445 defm STLURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9446 defm STLURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9447 defm STLURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9448 defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8 , (outs FPR8 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9449 defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9450 defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9451 defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9452 defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9453 9454 // L 9455 def STL1: LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">; 9456 def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">; 9457 9458 // Aliases for when offset=0 9459 def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>; 9460} 9461 9462//===----------------------------------------------------------------------===// 9463// 128-bit System Instructions (FEAT_SYSINSTR128) 9464//===----------------------------------------------------------------------===// 9465let Predicates = [HasD128] in { 9466 def SYSPxt : SystemPXtI<0, "sysp">; 9467 9468 def SYSPxt_XZR 9469 : BaseSystemI<0, (outs), 9470 (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair), 9471 "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">, 9472 Sched<[WriteSys]> 9473 { 9474 // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?) 9475 // and therefore autogenerates a decoder that builds an MC representation that has 4 fields 9476 // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one 9477 // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc 9478 // is based off of the asm template (maybe) and therefore wants to print 5 operands. 9479 // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would 9480 // overlap with the main SYSP instruction. 9481 let DecoderMethod = "DecodeSyspXzrInstruction"; 9482 bits<3> op1; 9483 bits<4> Cn; 9484 bits<4> Cm; 9485 bits<3> op2; 9486 let Inst{22} = 0b1; // override BaseSystemI 9487 let Inst{20-19} = 0b01; 9488 let Inst{18-16} = op1; 9489 let Inst{15-12} = Cn; 9490 let Inst{11-8} = Cm; 9491 let Inst{7-5} = op2; 9492 let Inst{4-0} = 0b11111; 9493 } 9494 9495 def : InstAlias<"sysp $op1, $Cn, $Cm, $op2", 9496 (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 9497} 9498 9499//--- 9500// 128-bit System Registers (FEAT_SYSREG128) 9501//--- 9502 9503// Instruction encoding: 9504// 9505// 31 22|21|20|19|18 16|15 12|11 8|7 5|4 0 9506// MRRS 1101010101| 1| 1|o0| op1| Cn| Cm|op2| Rt 9507// MSRR 1101010101| 0| 1|o0| op1| Cn| Cm|op2| Rt 9508 9509// Instruction syntax: 9510// 9511// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>> 9512// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1> 9513// 9514// ...where t is even (X0, X2, etc). 9515 9516let Predicates = [HasD128] in { 9517 def MRRS : RtSystemI128<1, 9518 (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg), 9519 "mrrs", "\t$Rt, $systemreg"> 9520 { 9521 bits<16> systemreg; 9522 let Inst{20-5} = systemreg; 9523 } 9524 9525 def MSRR : RtSystemI128<0, 9526 (outs), (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt), 9527 "msrr", "\t$systemreg, $Rt"> 9528 { 9529 bits<16> systemreg; 9530 let Inst{20-5} = systemreg; 9531 } 9532} 9533 9534//===----------------------------===// 9535// 2023 Architecture Extensions: 9536//===----------------------------===// 9537 9538let Predicates = [HasFP8] in { 9539 defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">; 9540 defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">; 9541 defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">; 9542 defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">; 9543 defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">; 9544 defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">; 9545 defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>; 9546} // End let Predicates = [HasFP8] 9547 9548let Predicates = [HasFAMINMAX] in { 9549 defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>; 9550 defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>; 9551} // End let Predicates = [HasFAMAXMIN] 9552 9553let Predicates = [HasFP8FMA] in { 9554 defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">; 9555 defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">; 9556 defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">; 9557 defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">; 9558 defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">; 9559 defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">; 9560 9561 defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">; 9562 defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">; 9563 defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">; 9564 defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">; 9565 defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">; 9566 defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">; 9567} // End let Predicates = [HasFP8FMA] 9568 9569let Predicates = [HasFP8DOT2] in { 9570 defm FDOTlane : SIMDThreeSameVectorFP8DOT2Index<"fdot">; 9571 defm FDOT : SIMDThreeSameVectorDOT2<"fdot">; 9572} // End let Predicates = [HasFP8DOT2] 9573 9574let Predicates = [HasFP8DOT4] in { 9575 defm FDOTlane : SIMDThreeSameVectorFP8DOT4Index<"fdot">; 9576 defm FDOT : SIMDThreeSameVectorDOT4<"fdot">; 9577} // End let Predicates = [HasFP8DOT4] 9578 9579//===----------------------------------------------------------------------===// 9580// Checked Pointer Arithmetic (FEAT_CPA) 9581//===----------------------------------------------------------------------===// 9582let Predicates = [HasCPA] in { 9583 // Scalar add/subtract 9584 defm ADDPT : AddSubCPA<0, "addpt">; 9585 defm SUBPT : AddSubCPA<1, "subpt">; 9586 9587 // Scalar multiply-add/subtract 9588 def MADDPT : MulAccumCPA<0, "maddpt">; 9589 def MSUBPT : MulAccumCPA<1, "msubpt">; 9590} 9591 9592include "AArch64InstrAtomics.td" 9593include "AArch64SVEInstrInfo.td" 9594include "AArch64SMEInstrInfo.td" 9595include "AArch64InstrGISel.td" 9596