1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Instruction definitions. 10// 11//===----------------------------------------------------------------------===// 12 13//===----------------------------------------------------------------------===// 14// ARM Instruction Predicate Definitions. 15// 16def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">, 17 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">; 18def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, 19 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">; 20def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, 21 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">; 22def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, 23 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">; 24def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, 25 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">; 26def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, 27 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">; 28def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, 29 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">; 30def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">, 31 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">; 32def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">, 33 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">; 34def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">, 35 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">; 36def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">, 37 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">; 38def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, 39 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">; 40def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, 41 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">; 42def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, 43 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">; 44def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">, 45 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">; 46def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, 47 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">; 48 49def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">, 50 AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">; 51 52def HasEL3 : Predicate<"Subtarget->hasEL3()">, 53 AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">; 54 55def HasVH : Predicate<"Subtarget->hasVH()">, 56 AssemblerPredicateWithAll<(all_of FeatureVH), "vh">; 57 58def HasLOR : Predicate<"Subtarget->hasLOR()">, 59 AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">; 60 61def HasPAuth : Predicate<"Subtarget->hasPAuth()">, 62 AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; 63 64def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">, 65 AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">; 66 67def HasJS : Predicate<"Subtarget->hasJS()">, 68 AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; 69 70def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, 71 AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">; 72 73def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, 74 AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">; 75 76def HasNV : Predicate<"Subtarget->hasNV()">, 77 AssemblerPredicateWithAll<(all_of FeatureNV), "nv">; 78 79def HasMPAM : Predicate<"Subtarget->hasMPAM()">, 80 AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">; 81 82def HasDIT : Predicate<"Subtarget->hasDIT()">, 83 AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">; 84 85def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, 86 AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">; 87 88def HasAM : Predicate<"Subtarget->hasAM()">, 89 AssemblerPredicateWithAll<(all_of FeatureAM), "am">; 90 91def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, 92 AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">; 93 94def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, 95 AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">; 96 97def HasFlagM : Predicate<"Subtarget->hasFlagM()">, 98 AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; 99 100def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">, 101 AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; 102 103def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, 104 AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">; 105def HasNEON : Predicate<"Subtarget->hasNEON()">, 106 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">; 107def HasSM4 : Predicate<"Subtarget->hasSM4()">, 108 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">; 109def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, 110 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">; 111def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, 112 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">; 113def HasAES : Predicate<"Subtarget->hasAES()">, 114 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">; 115def HasDotProd : Predicate<"Subtarget->hasDotProd()">, 116 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">; 117def HasCRC : Predicate<"Subtarget->hasCRC()">, 118 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">; 119def HasCSSC : Predicate<"Subtarget->hasCSSC()">, 120 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">; 121def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">; 122def HasLSE : Predicate<"Subtarget->hasLSE()">, 123 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">; 124def HasNoLSE : Predicate<"!Subtarget->hasLSE()">; 125def HasRAS : Predicate<"Subtarget->hasRAS()">, 126 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">; 127def HasRDM : Predicate<"Subtarget->hasRDM()">, 128 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">; 129def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, 130 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">; 131def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, 132 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">; 133def HasSPE : Predicate<"Subtarget->hasSPE()">, 134 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">; 135def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, 136 AssemblerPredicateWithAll<(all_of FeatureFuseAES), 137 "fuse-aes">; 138def HasSVE : Predicate<"Subtarget->hasSVE()">, 139 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">; 140def HasSVE2 : Predicate<"Subtarget->hasSVE2()">, 141 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">; 142def HasSVE2p1 : Predicate<"Subtarget->hasSVE2p1()">, 143 AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">; 144def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">, 145 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">; 146def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, 147 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">; 148def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, 149 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">; 150def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, 151 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; 152def HasB16B16 : Predicate<"Subtarget->hasB16B16()">, 153 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">; 154def HasSME : Predicate<"Subtarget->hasSME()">, 155 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; 156def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">, 157 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">; 158def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">, 159 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">; 160def HasSMEFA64 : Predicate<"Subtarget->hasSMEFA64()">, 161 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">; 162def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">, 163 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">; 164def HasSME2 : Predicate<"Subtarget->hasSME2()">, 165 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">; 166def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">, 167 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">; 168def HasFPMR : Predicate<"Subtarget->hasFPMR()">, 169 AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">; 170def HasFP8 : Predicate<"Subtarget->hasFP8()">, 171 AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">; 172def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">, 173 AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">; 174def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">, 175 AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">; 176def HasSSVE_FP8FMA : Predicate<"Subtarget->hasSSVE_FP8FMA() || " 177 "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">, 178 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA, 179 (all_of FeatureSVE2, FeatureFP8FMA)), 180 "ssve-fp8fma or (sve2 and fp8fma)">; 181def HasFP8DOT2 : Predicate<"Subtarget->hasFP8DOT2()">, 182 AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">; 183def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || " 184 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">, 185 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2, 186 (all_of FeatureSVE2, FeatureFP8DOT2)), 187 "ssve-fp8dot2 or (sve2 and fp8dot2)">; 188def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">, 189 AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">; 190def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || " 191 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">, 192 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4, 193 (all_of FeatureSVE2, FeatureFP8DOT4)), 194 "ssve-fp8dot4 or (sve2 and fp8dot4)">; 195def HasLUT : Predicate<"Subtarget->hasLUT()">, 196 AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">; 197def HasSME_LUTv2 : Predicate<"Subtarget->hasSME_LUTv2()">, 198 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">; 199def HasSMEF8F16 : Predicate<"Subtarget->hasSMEF8F16()">, 200 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">; 201def HasSMEF8F32 : Predicate<"Subtarget->hasSMEF8F32()">, 202 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">; 203 204// A subset of SVE(2) instructions are legal in Streaming SVE execution mode, 205// they should be enabled if either has been specified. 206def HasSVEorSME 207 : Predicate<"Subtarget->hasSVEorSME()">, 208 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), 209 "sve or sme">; 210def HasSVE2orSME 211 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">, 212 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME), 213 "sve2 or sme">; 214def HasSVE2orSME2 215 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">, 216 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2), 217 "sve2 or sme2">; 218def HasSVE2p1_or_HasSME 219 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">, 220 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; 221def HasSVE2p1_or_HasSME2 222 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">, 223 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; 224def HasSVE2p1_or_HasSME2p1 225 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">, 226 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">; 227// A subset of NEON instructions are legal in Streaming SVE execution mode, 228// they should be enabled if either has been specified. 229def HasNEONorSME 230 : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">, 231 AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME), 232 "neon or sme">; 233def HasRCPC : Predicate<"Subtarget->hasRCPC()">, 234 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; 235def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, 236 AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">; 237def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">, 238 AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">; 239def HasSB : Predicate<"Subtarget->hasSB()">, 240 AssemblerPredicateWithAll<(all_of FeatureSB), "sb">; 241def HasPredRes : Predicate<"Subtarget->hasPredRes()">, 242 AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">; 243def HasCCDP : Predicate<"Subtarget->hasCCDP()">, 244 AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; 245def HasBTI : Predicate<"Subtarget->hasBTI()">, 246 AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; 247def HasMTE : Predicate<"Subtarget->hasMTE()">, 248 AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; 249def HasTME : Predicate<"Subtarget->hasTME()">, 250 AssemblerPredicateWithAll<(all_of FeatureTME), "tme">; 251def HasETE : Predicate<"Subtarget->hasETE()">, 252 AssemblerPredicateWithAll<(all_of FeatureETE), "ete">; 253def HasTRBE : Predicate<"Subtarget->hasTRBE()">, 254 AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">; 255def HasBF16 : Predicate<"Subtarget->hasBF16()">, 256 AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">; 257def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">, 258 AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">; 259def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">, 260 AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">; 261def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">, 262 AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">; 263def HasXS : Predicate<"Subtarget->hasXS()">, 264 AssemblerPredicateWithAll<(all_of FeatureXS), "xs">; 265def HasWFxT : Predicate<"Subtarget->hasWFxT()">, 266 AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">; 267def HasLS64 : Predicate<"Subtarget->hasLS64()">, 268 AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">; 269def HasBRBE : Predicate<"Subtarget->hasBRBE()">, 270 AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">; 271def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, 272 AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">; 273def HasHBC : Predicate<"Subtarget->hasHBC()">, 274 AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">; 275def HasMOPS : Predicate<"Subtarget->hasMOPS()">, 276 AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">; 277def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">, 278 AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">; 279def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">, 280 AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">; 281def HasITE : Predicate<"Subtarget->hasITE()">, 282 AssemblerPredicateWithAll<(all_of FeatureITE), "ite">; 283def HasTHE : Predicate<"Subtarget->hasTHE()">, 284 AssemblerPredicateWithAll<(all_of FeatureTHE), "the">; 285def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">, 286 AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">; 287def HasLSE128 : Predicate<"Subtarget->hasLSE128()">, 288 AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">; 289def HasD128 : Predicate<"Subtarget->hasD128()">, 290 AssemblerPredicateWithAll<(all_of FeatureD128), "d128">; 291def HasCHK : Predicate<"Subtarget->hasCHK()">, 292 AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">; 293def HasGCS : Predicate<"Subtarget->hasGCS()">, 294 AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">; 295def HasCPA : Predicate<"Subtarget->hasCPA()">, 296 AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">; 297def IsLE : Predicate<"Subtarget->isLittleEndian()">; 298def IsBE : Predicate<"!Subtarget->isLittleEndian()">; 299def IsWindows : Predicate<"Subtarget->isTargetWindows()">; 300def UseExperimentalZeroingPseudos 301 : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; 302def UseAlternateSExtLoadCVTF32 303 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; 304 305def UseNegativeImmediates 306 : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), 307 "NegativeImmediates">; 308 309def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; 310 311def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">; 312 313def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">; 314 315def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", 316 SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, 317 SDTCisInt<1>]>>; 318 319 320//===----------------------------------------------------------------------===// 321// AArch64-specific DAG Nodes. 322// 323 324// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS 325def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, 326 [SDTCisSameAs<0, 2>, 327 SDTCisSameAs<0, 3>, 328 SDTCisInt<0>, SDTCisVT<1, i32>]>; 329 330// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS 331def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, 332 [SDTCisSameAs<0, 1>, 333 SDTCisSameAs<0, 2>, 334 SDTCisInt<0>, 335 SDTCisVT<3, i32>]>; 336 337// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS 338def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, 339 [SDTCisSameAs<0, 2>, 340 SDTCisSameAs<0, 3>, 341 SDTCisInt<0>, 342 SDTCisVT<1, i32>, 343 SDTCisVT<4, i32>]>; 344 345def SDT_AArch64Brcond : SDTypeProfile<0, 3, 346 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, 347 SDTCisVT<2, i32>]>; 348def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; 349def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 350 SDTCisVT<2, OtherVT>]>; 351 352 353def SDT_AArch64CSel : SDTypeProfile<1, 4, 354 [SDTCisSameAs<0, 1>, 355 SDTCisSameAs<0, 2>, 356 SDTCisInt<3>, 357 SDTCisVT<4, i32>]>; 358def SDT_AArch64CCMP : SDTypeProfile<1, 5, 359 [SDTCisVT<0, i32>, 360 SDTCisInt<1>, 361 SDTCisSameAs<1, 2>, 362 SDTCisInt<3>, 363 SDTCisInt<4>, 364 SDTCisVT<5, i32>]>; 365def SDT_AArch64FCCMP : SDTypeProfile<1, 5, 366 [SDTCisVT<0, i32>, 367 SDTCisFP<1>, 368 SDTCisSameAs<1, 2>, 369 SDTCisInt<3>, 370 SDTCisInt<4>, 371 SDTCisVT<5, i32>]>; 372def SDT_AArch64FCmp : SDTypeProfile<0, 2, 373 [SDTCisFP<0>, 374 SDTCisSameAs<0, 1>]>; 375def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; 376def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; 377def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; 378def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, 379 SDTCisSameAs<0, 1>, 380 SDTCisSameAs<0, 2>]>; 381def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; 382def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 383def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 384 SDTCisInt<2>, SDTCisInt<3>]>; 385def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 386def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 387 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 388def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; 389def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 390 SDTCisVec<2>, SDTCisSameAs<2,3>]>; 391 392def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>, 393 SDTCisSameAs<0,1>, 394 SDTCisSameAs<0,2>]>; 395 396def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 397def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; 398def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; 399def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 400 SDTCisSameAs<0,2>]>; 401def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 402 SDTCisSameAs<0,2>, 403 SDTCisSameAs<0,3>]>; 404def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; 405def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; 406 407def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; 408 409def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, 410 SDTCisPtrTy<1>]>; 411 412def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 413 414def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 415def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 416def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 417def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 418def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 419def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 420 421// Generates the general dynamic sequences, i.e. 422// adrp x0, :tlsdesc:var 423// ldr x1, [x0, #:tlsdesc_lo12:var] 424// add x0, x0, #:tlsdesc_lo12:var 425// .tlsdesccall var 426// blr x1 427 428// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) 429// number of operands (the variable) 430def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, 431 [SDTCisPtrTy<0>]>; 432 433def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, 434 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, 435 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, 436 SDTCisSameAs<1, 4>]>; 437 438def SDT_AArch64TBL : SDTypeProfile<1, 2, [ 439 SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> 440]>; 441 442// non-extending masked load fragment. 443def nonext_masked_load : 444 PatFrag<(ops node:$ptr, node:$pred, node:$def), 445 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 446 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 447 cast<MaskedLoadSDNode>(N)->isUnindexed() && 448 !cast<MaskedLoadSDNode>(N)->isNonTemporal(); 449}]>; 450// Any/Zero extending masked load fragments. 451def azext_masked_load : 452 PatFrag<(ops node:$ptr, node:$pred, node:$def), 453 (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ 454 return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD || 455 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) && 456 cast<MaskedLoadSDNode>(N)->isUnindexed(); 457}]>; 458def azext_masked_load_i8 : 459 PatFrag<(ops node:$ptr, node:$pred, node:$def), 460 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 461 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 462}]>; 463def azext_masked_load_i16 : 464 PatFrag<(ops node:$ptr, node:$pred, node:$def), 465 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 466 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 467}]>; 468def azext_masked_load_i32 : 469 PatFrag<(ops node:$ptr, node:$pred, node:$def), 470 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 471 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 472}]>; 473// Sign extending masked load fragments. 474def sext_masked_load : 475 PatFrag<(ops node:$ptr, node:$pred, node:$def), 476 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 477 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD && 478 cast<MaskedLoadSDNode>(N)->isUnindexed(); 479}]>; 480def sext_masked_load_i8 : 481 PatFrag<(ops node:$ptr, node:$pred, node:$def), 482 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 483 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 484}]>; 485def sext_masked_load_i16 : 486 PatFrag<(ops node:$ptr, node:$pred, node:$def), 487 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 488 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 489}]>; 490def sext_masked_load_i32 : 491 PatFrag<(ops node:$ptr, node:$pred, node:$def), 492 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 493 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 494}]>; 495 496def non_temporal_load : 497 PatFrag<(ops node:$ptr, node:$pred, node:$def), 498 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 499 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 500 cast<MaskedLoadSDNode>(N)->isUnindexed() && 501 cast<MaskedLoadSDNode>(N)->isNonTemporal(); 502}]>; 503 504// non-truncating masked store fragment. 505def nontrunc_masked_store : 506 PatFrag<(ops node:$val, node:$ptr, node:$pred), 507 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 508 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 509 cast<MaskedStoreSDNode>(N)->isUnindexed() && 510 !cast<MaskedStoreSDNode>(N)->isNonTemporal(); 511}]>; 512// truncating masked store fragments. 513def trunc_masked_store : 514 PatFrag<(ops node:$val, node:$ptr, node:$pred), 515 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 516 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 517 cast<MaskedStoreSDNode>(N)->isUnindexed(); 518}]>; 519def trunc_masked_store_i8 : 520 PatFrag<(ops node:$val, node:$ptr, node:$pred), 521 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 522 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 523}]>; 524def trunc_masked_store_i16 : 525 PatFrag<(ops node:$val, node:$ptr, node:$pred), 526 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 527 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 528}]>; 529def trunc_masked_store_i32 : 530 PatFrag<(ops node:$val, node:$ptr, node:$pred), 531 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 532 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 533}]>; 534 535def non_temporal_store : 536 PatFrag<(ops node:$val, node:$ptr, node:$pred), 537 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 538 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 539 cast<MaskedStoreSDNode>(N)->isUnindexed() && 540 cast<MaskedStoreSDNode>(N)->isNonTemporal(); 541}]>; 542 543multiclass masked_gather_scatter<PatFrags GatherScatterOp> { 544 // offsets = (signed)Index << sizeof(elt) 545 def NAME#_signed_scaled : 546 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 547 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 548 auto MGS = cast<MaskedGatherScatterSDNode>(N); 549 bool Signed = MGS->isIndexSigned() || 550 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 551 return Signed && MGS->isIndexScaled(); 552 }]>; 553 // offsets = (signed)Index 554 def NAME#_signed_unscaled : 555 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 556 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 557 auto MGS = cast<MaskedGatherScatterSDNode>(N); 558 bool Signed = MGS->isIndexSigned() || 559 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 560 return Signed && !MGS->isIndexScaled(); 561 }]>; 562 // offsets = (unsigned)Index << sizeof(elt) 563 def NAME#_unsigned_scaled : 564 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 565 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 566 auto MGS = cast<MaskedGatherScatterSDNode>(N); 567 bool Signed = MGS->isIndexSigned() || 568 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 569 return !Signed && MGS->isIndexScaled(); 570 }]>; 571 // offsets = (unsigned)Index 572 def NAME#_unsigned_unscaled : 573 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 574 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 575 auto MGS = cast<MaskedGatherScatterSDNode>(N); 576 bool Signed = MGS->isIndexSigned() || 577 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 578 return !Signed && !MGS->isIndexScaled(); 579 }]>; 580} 581 582defm nonext_masked_gather : masked_gather_scatter<nonext_masked_gather>; 583defm azext_masked_gather_i8 : masked_gather_scatter<azext_masked_gather_i8>; 584defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>; 585defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>; 586defm sext_masked_gather_i8 : masked_gather_scatter<sext_masked_gather_i8>; 587defm sext_masked_gather_i16 : masked_gather_scatter<sext_masked_gather_i16>; 588defm sext_masked_gather_i32 : masked_gather_scatter<sext_masked_gather_i32>; 589 590defm nontrunc_masked_scatter : masked_gather_scatter<nontrunc_masked_scatter>; 591defm trunc_masked_scatter_i8 : masked_gather_scatter<trunc_masked_scatter_i8>; 592defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>; 593defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>; 594 595// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise 596def top16Zero: PatLeaf<(i32 GPR32:$src), [{ 597 return SDValue(N,0)->getValueType(0) == MVT::i32 && 598 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); 599 }]>; 600 601// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise 602def top32Zero: PatLeaf<(i64 GPR64:$src), [{ 603 return SDValue(N,0)->getValueType(0) == MVT::i64 && 604 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32)); 605 }]>; 606 607// topbitsallzero - Return true if all bits except the lowest bit are known zero 608def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{ 609 return SDValue(N,0)->getValueType(0) == MVT::i32 && 610 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31)); 611 }]>; 612def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{ 613 return SDValue(N,0)->getValueType(0) == MVT::i64 && 614 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63)); 615 }]>; 616 617// Node definitions. 618def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; 619def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; 620def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; 621def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; 622def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", 623 SDCallSeqStart<[ SDTCisVT<0, i32>, 624 SDTCisVT<1, i32> ]>, 625 [SDNPHasChain, SDNPOutGlue]>; 626def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", 627 SDCallSeqEnd<[ SDTCisVT<0, i32>, 628 SDTCisVT<1, i32> ]>, 629 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 630def AArch64call : SDNode<"AArch64ISD::CALL", 631 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 632 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 633 SDNPVariadic]>; 634 635def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", 636 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 637 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 638 SDNPVariadic]>; 639 640def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", 641 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 642 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 643 SDNPVariadic]>; 644 645def AArch64call_arm64ec_to_x64 : SDNode<"AArch64ISD::CALL_ARM64EC_TO_X64", 646 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 647 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 648 SDNPVariadic]>; 649 650def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, 651 [SDNPHasChain]>; 652def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, 653 [SDNPHasChain]>; 654def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, 655 [SDNPHasChain]>; 656def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, 657 [SDNPHasChain]>; 658def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, 659 [SDNPHasChain]>; 660 661 662def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; 663def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; 664def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; 665def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; 666def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone, 667 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 668def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; 669def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; 670def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, 671 [SDNPCommutative]>; 672def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; 673def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, 674 [SDNPCommutative]>; 675def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; 676def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; 677 678def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; 679def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; 680def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; 681 682def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; 683 684def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; 685def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, 686 [SDNPHasChain]>; 687def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, 688 [SDNPHasChain]>; 689def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), 690 [(AArch64strict_fcmp node:$lhs, node:$rhs), 691 (AArch64fcmp node:$lhs, node:$rhs)]>; 692 693def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; 694def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; 695def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; 696def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; 697def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; 698def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; 699 700def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; 701 702def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; 703def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; 704def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; 705def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; 706def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; 707def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; 708 709def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; 710def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; 711def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; 712def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; 713def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; 714def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; 715def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; 716 717def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; 718def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; 719def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; 720def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; 721 722def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; 723def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; 724def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; 725def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; 726def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; 727def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; 728def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; 729def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; 730def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; 731def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; 732 733def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; 734def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; 735 736def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; 737def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; 738def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; 739def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; 740def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; 741 742def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; 743def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; 744def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; 745 746def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; 747def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; 748def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; 749def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; 750def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; 751def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), 752 (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>; 753 754def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; 755def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; 756def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; 757def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; 758def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; 759 760def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; 761def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; 762 763def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, 764 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 765 766def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, 767 [SDNPHasChain, SDNPSideEffect]>; 768 769def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; 770def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; 771 772def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", 773 SDT_AArch64TLSDescCallSeq, 774 [SDNPInGlue, SDNPOutGlue, SDNPHasChain, 775 SDNPVariadic]>; 776 777 778def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", 779 SDT_AArch64WrapperLarge>; 780 781def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; 782 783def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 784 SDTCisSameAs<1, 2>]>; 785def AArch64pmull : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull, 786 [SDNPCommutative]>; 787def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull, 788 [SDNPCommutative]>; 789def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull, 790 [SDNPCommutative]>; 791 792def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; 793def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; 794def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; 795def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; 796 797def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>; 798def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>; 799 800def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; 801def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; 802def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; 803def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; 804def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; 805def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; 806def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>; 807def AArch64saddlv : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>; 808 809def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), 810 [(abdu node:$lhs, node:$rhs), 811 (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; 812def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), 813 [(abds node:$lhs, node:$rhs), 814 (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; 815 816def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>; 817def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>; 818def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>; 819def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm), 820 [(AArch64addp_n node:$Rn, node:$Rm), 821 (int_aarch64_neon_addp node:$Rn, node:$Rm)]>; 822def AArch64uaddlp : PatFrags<(ops node:$src), 823 [(AArch64uaddlp_n node:$src), 824 (int_aarch64_neon_uaddlp node:$src)]>; 825def AArch64saddlp : PatFrags<(ops node:$src), 826 [(AArch64saddlp_n node:$src), 827 (int_aarch64_neon_saddlp node:$src)]>; 828def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm), 829 [(AArch64addp_n node:$Rn, node:$Rm), 830 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>; 831def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>; 832def AArch64rshrn : PatFrags<(ops node:$LHS, node:$RHS), 833 [(trunc (AArch64roundingvlshr node:$LHS, node:$RHS)), 834 (int_aarch64_neon_rshrn node:$LHS, node:$RHS)]>; 835def AArch64facge : PatFrags<(ops node:$Rn, node:$Rm), 836 [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)), 837 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>; 838def AArch64facgt : PatFrags<(ops node:$Rn, node:$Rm), 839 [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)), 840 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>; 841 842def AArch64fmaxnmv : PatFrags<(ops node:$Rn), 843 [(vecreduce_fmax node:$Rn), 844 (int_aarch64_neon_fmaxnmv node:$Rn)]>; 845def AArch64fminnmv : PatFrags<(ops node:$Rn), 846 [(vecreduce_fmin node:$Rn), 847 (int_aarch64_neon_fminnmv node:$Rn)]>; 848def AArch64fmaxv : PatFrags<(ops node:$Rn), 849 [(vecreduce_fmaximum node:$Rn), 850 (int_aarch64_neon_fmaxv node:$Rn)]>; 851def AArch64fminv : PatFrags<(ops node:$Rn), 852 [(vecreduce_fminimum node:$Rn), 853 (int_aarch64_neon_fminv node:$Rn)]>; 854 855def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; 856def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 857def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 858def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 859def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 860 861def SDT_AArch64unpk : SDTypeProfile<1, 1, [ 862 SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0> 863]>; 864def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>; 865def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>; 866def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>; 867def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; 868 869def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 870def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 871def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 872def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 873def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 874def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 875 876def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; 877 878def AArch64probedalloca 879 : SDNode<"AArch64ISD::PROBED_ALLOCA", 880 SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, 881 [SDNPHasChain, SDNPMayStore]>; 882 883def AArch64mrs : SDNode<"AArch64ISD::MRS", 884 SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, 885 [SDNPHasChain, SDNPOutGlue]>; 886 887def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>; 888def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>; 889def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i), 890 [(AArch64rshrnb node:$rs, node:$i), 891 (int_aarch64_sve_rshrnb node:$rs, node:$i)]>; 892 893def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1, 894 [SDTCisInt<0>, SDTCisVec<1>]>, []>; 895 896// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands 897// have no common bits. 898def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), 899 [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ 900 if (N->getOpcode() == ISD::ADD) 901 return true; 902 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); 903}]> { 904 let GISelPredicateCode = [{ 905 // Only handle G_ADD for now. FIXME. build capability to compute whether 906 // operands of G_OR have common bits set or not. 907 return MI.getOpcode() == TargetOpcode::G_ADD; 908 }]; 909} 910 911// Match mul with enough sign-bits. Can be reduced to a smaller mul operand. 912def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{ 913 return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 && 914 CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32; 915}]>; 916 917//===----------------------------------------------------------------------===// 918 919//===----------------------------------------------------------------------===// 920 921// AArch64 Instruction Predicate Definitions. 922// We could compute these on a per-module basis but doing so requires accessing 923// the Function object through the <Target>Subtarget and objections were raised 924// to that (see post-commit review comments for r301750). 925let RecomputePerFunction = 1 in { 926 def ForCodeSize : Predicate<"shouldOptForSize(MF)">; 927 def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">; 928 // Avoid generating STRQro if it is slow, unless we're optimizing for code size. 929 def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">; 930 931 def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 932 def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 933 934 def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 935 def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 936 // Toggles patterns which aren't beneficial in GlobalISel when we aren't 937 // optimizing. This allows us to selectively use patterns without impacting 938 // SelectionDAG's behaviour. 939 // FIXME: One day there will probably be a nicer way to check for this, but 940 // today is not that day. 941 def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; 942} 943 944include "AArch64InstrFormats.td" 945include "SVEInstrFormats.td" 946include "SMEInstrFormats.td" 947 948//===----------------------------------------------------------------------===// 949 950//===----------------------------------------------------------------------===// 951// Miscellaneous instructions. 952//===----------------------------------------------------------------------===// 953 954let hasSideEffects = 1, isCodeGenOnly = 1 in { 955let Defs = [SP], Uses = [SP] in { 956// We set Sched to empty list because we expect these instructions to simply get 957// removed in most cases. 958def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 959 [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, 960 Sched<[]>; 961def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 962 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, 963 Sched<[]>; 964 965} 966 967let Defs = [SP, NZCV], Uses = [SP] in { 968// Probed stack allocation of a constant size, used in function prologues when 969// stack-clash protection is enabled. 970def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch), 971 (ins i64imm:$stacksize, i64imm:$fixed_offset, 972 i64imm:$scalable_offset), 973 []>, 974 Sched<[]>; 975 976// Probed stack allocation of a variable size, used in function prologues when 977// stack-clash protection is enabled. 978def PROBED_STACKALLOC_VAR : Pseudo<(outs), 979 (ins GPR64sp:$target), 980 []>, 981 Sched<[]>; 982 983// Probed stack allocations of a variable size, used for allocas of unknown size 984// when stack-clash protection is enabled. 985let usesCustomInserter = 1 in 986def PROBED_STACKALLOC_DYN : Pseudo<(outs), 987 (ins GPR64common:$target), 988 [(AArch64probedalloca GPR64common:$target)]>, 989 Sched<[]>; 990 991} // Defs = [SP, NZCV], Uses = [SP] in 992} // hasSideEffects = 1, isCodeGenOnly = 1 993 994let isReMaterializable = 1, isCodeGenOnly = 1 in { 995// FIXME: The following pseudo instructions are only needed because remat 996// cannot handle multiple instructions. When that changes, they can be 997// removed, along with the AArch64Wrapper node. 998 999let AddedComplexity = 10 in 1000def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), 1001 [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, 1002 Sched<[WriteLDAdr]>; 1003 1004// The MOVaddr instruction should match only when the add is not folded 1005// into a load or store address. 1006def MOVaddr 1007 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1008 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), 1009 tglobaladdr:$low))]>, 1010 Sched<[WriteAdrAdr]>; 1011def MOVaddrJT 1012 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1013 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), 1014 tjumptable:$low))]>, 1015 Sched<[WriteAdrAdr]>; 1016def MOVaddrCP 1017 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1018 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), 1019 tconstpool:$low))]>, 1020 Sched<[WriteAdrAdr]>; 1021def MOVaddrBA 1022 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1023 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), 1024 tblockaddress:$low))]>, 1025 Sched<[WriteAdrAdr]>; 1026def MOVaddrTLS 1027 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1028 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), 1029 tglobaltlsaddr:$low))]>, 1030 Sched<[WriteAdrAdr]>; 1031def MOVaddrEXT 1032 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1033 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), 1034 texternalsym:$low))]>, 1035 Sched<[WriteAdrAdr]>; 1036// Normally AArch64addlow either gets folded into a following ldr/str, 1037// or together with an adrp into MOVaddr above. For cases with TLS, it 1038// might appear without either of them, so allow lowering it into a plain 1039// add. 1040def ADDlowTLS 1041 : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), 1042 [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, 1043 tglobaltlsaddr:$low))]>, 1044 Sched<[WriteAdr]>; 1045 1046} // isReMaterializable, isCodeGenOnly 1047 1048def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), 1049 (LOADgot tglobaltlsaddr:$addr)>; 1050 1051def : Pat<(AArch64LOADgot texternalsym:$addr), 1052 (LOADgot texternalsym:$addr)>; 1053 1054def : Pat<(AArch64LOADgot tconstpool:$addr), 1055 (LOADgot tconstpool:$addr)>; 1056 1057// In general these get lowered into a sequence of three 4-byte instructions. 1058// 32-bit jump table destination is actually only 2 instructions since we can 1059// use the table itself as a PC-relative base. But optimization occurs after 1060// branch relaxation so be pessimistic. 1061let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch", 1062 isNotDuplicable = 1 in { 1063def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 1064 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 1065 Sched<[]>; 1066def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 1067 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 1068 Sched<[]>; 1069def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 1070 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 1071 Sched<[]>; 1072} 1073 1074// Space-consuming pseudo to aid testing of placement and reachability 1075// algorithms. Immediate operand is the number of bytes this "instruction" 1076// occupies; register operands can be used to enforce dependency and constrain 1077// the scheduler. 1078let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in 1079def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn), 1080 [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>, 1081 Sched<[]>; 1082 1083let hasSideEffects = 1, isCodeGenOnly = 1 in { 1084 def SpeculationSafeValueX 1085 : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>; 1086 def SpeculationSafeValueW 1087 : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>; 1088} 1089 1090// SpeculationBarrierEndBB must only be used after an unconditional control 1091// flow, i.e. after a terminator for which isBarrier is True. 1092let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { 1093 // This gets lowered to a pair of 4-byte instructions. 1094 let Size = 8 in 1095 def SpeculationBarrierISBDSBEndBB 1096 : Pseudo<(outs), (ins), []>, Sched<[]>; 1097 // This gets lowered to a 4-byte instruction. 1098 let Size = 4 in 1099 def SpeculationBarrierSBEndBB 1100 : Pseudo<(outs), (ins), []>, Sched<[]>; 1101} 1102 1103//===----------------------------------------------------------------------===// 1104// System instructions. 1105//===----------------------------------------------------------------------===// 1106 1107def HINT : HintI<"hint">; 1108def : InstAlias<"nop", (HINT 0b000)>; 1109def : InstAlias<"yield",(HINT 0b001)>; 1110def : InstAlias<"wfe", (HINT 0b010)>; 1111def : InstAlias<"wfi", (HINT 0b011)>; 1112def : InstAlias<"sev", (HINT 0b100)>; 1113def : InstAlias<"sevl", (HINT 0b101)>; 1114def : InstAlias<"dgh", (HINT 0b110)>; 1115def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; 1116def : InstAlias<"csdb", (HINT 20)>; 1117// In order to be able to write readable assembly, LLVM should accept assembly 1118// inputs that use Branch Target Indentification mnemonics, even with BTI disabled. 1119// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1120// should not emit these mnemonics unless BTI is enabled. 1121def : InstAlias<"bti", (HINT 32), 0>; 1122def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; 1123def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; 1124def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; 1125 1126// v8.2a Statistical Profiling extension 1127def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; 1128 1129// As far as LLVM is concerned this writes to the system's exclusive monitors. 1130let mayLoad = 1, mayStore = 1 in 1131def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; 1132 1133// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot 1134// model patterns with sufficiently fine granularity. 1135let mayLoad = ?, mayStore = ? in { 1136def DMB : CRmSystemI<barrier_op, 0b101, "dmb", 1137 [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>; 1138 1139def DSB : CRmSystemI<barrier_op, 0b100, "dsb", 1140 [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>; 1141 1142def ISB : CRmSystemI<barrier_op, 0b110, "isb", 1143 [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>; 1144 1145def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> { 1146 let CRm = 0b0010; 1147 let Inst{12} = 0; 1148 let Predicates = [HasTRACEV8_4]; 1149} 1150 1151def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> { 1152 let CRm{1-0} = 0b11; 1153 let Inst{9-8} = 0b10; 1154 let Predicates = [HasXS]; 1155} 1156 1157let Predicates = [HasWFxT] in { 1158def WFET : RegInputSystemI<0b0000, 0b000, "wfet">; 1159def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">; 1160} 1161 1162// Branch Record Buffer two-word mnemonic instructions 1163class BRBEI<bits<3> op2, string keyword> 1164 : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> { 1165 let Inst{31-8} = 0b110101010000100101110010; 1166 let Inst{7-5} = op2; 1167 let Predicates = [HasBRBE]; 1168} 1169def BRB_IALL: BRBEI<0b100, "\tiall">; 1170def BRB_INJ: BRBEI<0b101, "\tinj">; 1171 1172} 1173 1174// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ 1175def : TokenAlias<"INJ", "inj">; 1176def : TokenAlias<"IALL", "iall">; 1177 1178 1179// ARMv9.4-A Guarded Control Stack 1180class GCSNoOp<bits<3> op2, string mnemonic> 1181 : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> { 1182 let Inst{20-8} = 0b0100001110111; 1183 let Inst{7-5} = op2; 1184 let Predicates = [HasGCS]; 1185} 1186def GCSPUSHX : GCSNoOp<0b100, "gcspushx">; 1187def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">; 1188def GCSPOPX : GCSNoOp<0b110, "gcspopx">; 1189 1190class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic, 1191 list<dag> pattern = []> 1192 : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> { 1193 let Inst{20-19} = 0b01; 1194 let Inst{18-16} = op1; 1195 let Inst{15-8} = 0b01110111; 1196 let Inst{7-5} = op2; 1197 let Predicates = [HasGCS]; 1198} 1199 1200def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">; 1201def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">; 1202 1203class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic, 1204 list<dag> pattern = []> 1205 : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> { 1206 let Inst{20-19} = 0b01; 1207 let Inst{18-16} = op1; 1208 let Inst{15-8} = 0b01110111; 1209 let Inst{7-5} = op2; 1210 let Predicates = [HasGCS]; 1211} 1212 1213def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">; 1214def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">; 1215def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent 1216 1217def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>; 1218def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>; 1219 1220def : TokenAlias<"DSYNC", "dsync">; 1221 1222let Uses = [X16], Defs = [X16], CRm = 0b0101 in { 1223 def CHKFEAT : SystemNoOperands<0b000, "hint\t#40">; 1224} 1225def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>; 1226def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>; 1227 1228class GCSSt<string mnemonic, bits<3> op> 1229 : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, $Rn", "", []>, Sched<[]> { 1230 bits<5> Rt; 1231 bits<5> Rn; 1232 let Inst{31-15} = 0b11011001000111110; 1233 let Inst{14-12} = op; 1234 let Inst{11-10} = 0b11; 1235 let Inst{9-5} = Rn; 1236 let Inst{4-0} = Rt; 1237 let Predicates = [HasGCS]; 1238} 1239def GCSSTR : GCSSt<"gcsstr", 0b000>; 1240def GCSSTTR : GCSSt<"gcssttr", 0b001>; 1241 1242 1243// ARMv8.2-A Dot Product 1244let Predicates = [HasDotProd] in { 1245defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>; 1246defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>; 1247defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>; 1248defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>; 1249} 1250 1251// ARMv8.6-A BFloat 1252let Predicates = [HasNEON, HasBF16] in { 1253defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">; 1254defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">; 1255def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">; 1256def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1257def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1258def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1259def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1260def BFCVTN : SIMD_BFCVTN; 1261def BFCVTN2 : SIMD_BFCVTN2; 1262 1263// Vector-scalar BFDOT: 1264// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit 1265// register (the instruction uses a single 32-bit lane from it), so the pattern 1266// is a bit tricky. 1267def : Pat<(v2f32 (int_aarch64_neon_bfdot 1268 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1269 (v4bf16 (bitconvert 1270 (v2i32 (AArch64duplane32 1271 (v4i32 (bitconvert 1272 (v8bf16 (insert_subvector undef, 1273 (v4bf16 V64:$Rm), 1274 (i64 0))))), 1275 VectorIndexS:$idx)))))), 1276 (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1277 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 1278 VectorIndexS:$idx)>; 1279} 1280 1281let Predicates = [HasNEONorSME, HasBF16] in { 1282def BFCVT : BF16ToSinglePrecision<"bfcvt">; 1283} 1284 1285// ARMv8.6A AArch64 matrix multiplication 1286let Predicates = [HasMatMulInt8] in { 1287def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>; 1288def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>; 1289def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>; 1290defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>; 1291defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>; 1292 1293// sudot lane has a pattern where usdot is expected (there is no sudot). 1294// The second operand is used in the dup operation to repeat the indexed 1295// element. 1296class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind, 1297 string rhs_kind, RegisterOperand RegType, 1298 ValueType AccumType, ValueType InputType> 1299 : BaseSIMDThreeSameVectorIndexS<Q, 0, 0b00, 0b1111, "sudot", dst_kind, 1300 lhs_kind, rhs_kind, RegType, AccumType, 1301 InputType, null_frag> { 1302 let Pattern = [(set (AccumType RegType:$dst), 1303 (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd), 1304 (InputType (bitconvert (AccumType 1305 (AArch64duplane32 (v4i32 V128:$Rm), 1306 VectorIndexS:$idx)))), 1307 (InputType RegType:$Rn))))]; 1308} 1309 1310multiclass SIMDSUDOTIndex { 1311 def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>; 1312 def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>; 1313} 1314 1315defm SUDOTlane : SIMDSUDOTIndex; 1316 1317} 1318 1319// ARMv8.2-A FP16 Fused Multiply-Add Long 1320let Predicates = [HasNEON, HasFP16FML] in { 1321defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; 1322defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>; 1323defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>; 1324defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>; 1325defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>; 1326defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>; 1327defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>; 1328defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>; 1329} 1330 1331// Armv8.2-A Crypto extensions 1332let Predicates = [HasSHA3] in { 1333def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; 1334def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">; 1335def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">; 1336def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">; 1337def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">; 1338def EOR3 : CryptoRRRR_16B<0b00, "eor3">; 1339def BCAX : CryptoRRRR_16B<0b01, "bcax">; 1340def XAR : CryptoRRRi6<"xar">; 1341 1342class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy> 1343 : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))), 1344 (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>; 1345 1346def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1347 (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1348 1349def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>; 1350def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>; 1351def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>; 1352 1353def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>; 1354def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>; 1355def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>; 1356def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>; 1357 1358class EOR3_pattern<ValueType VecTy> 1359 : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)), 1360 (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1361 1362def : EOR3_pattern<v16i8>; 1363def : EOR3_pattern<v8i16>; 1364def : EOR3_pattern<v4i32>; 1365def : EOR3_pattern<v2i64>; 1366 1367class BCAX_pattern<ValueType VecTy> 1368 : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))), 1369 (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1370 1371def : BCAX_pattern<v16i8>; 1372def : BCAX_pattern<v8i16>; 1373def : BCAX_pattern<v4i32>; 1374def : BCAX_pattern<v2i64>; 1375 1376def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>; 1377def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>; 1378def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>; 1379def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>; 1380 1381def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>; 1382def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>; 1383def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>; 1384def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>; 1385 1386def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>; 1387def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>; 1388def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>; 1389def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>; 1390 1391def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1392 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1393 1394def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))), 1395 (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>; 1396 1397def : Pat<(xor (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))), 1398 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1399 1400} // HasSHA3 1401 1402let Predicates = [HasSM4] in { 1403def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">; 1404def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">; 1405def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">; 1406def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">; 1407def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">; 1408def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">; 1409def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">; 1410def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">; 1411def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">; 1412 1413def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))), 1414 (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>; 1415 1416class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode> 1417 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1418 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1419 1420class SM3TT_pattern<Instruction INST, Intrinsic OpNode> 1421 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )), 1422 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>; 1423 1424class SM4_pattern<Instruction INST, Intrinsic OpNode> 1425 : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1426 (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1427 1428def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>; 1429def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>; 1430 1431def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>; 1432def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>; 1433def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>; 1434def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>; 1435 1436def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>; 1437def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>; 1438} // HasSM4 1439 1440let Predicates = [HasRCPC] in { 1441 // v8.3 Release Consistent Processor Consistent support, optional in v8.2. 1442 def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; 1443 def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; 1444 def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; 1445 def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; 1446} 1447 1448// v8.3a complex add and multiply-accumulate. No predicate here, that is done 1449// inside the multiclass as the FP16 versions need different predicates. 1450defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, 1451 "fcmla", null_frag>; 1452defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, 1453 "fcadd", null_frag>; 1454defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">; 1455 1456let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1457 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1458 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>; 1459 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1460 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>; 1461 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1462 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>; 1463 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1464 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; 1465} 1466 1467let Predicates = [HasComplxNum, HasNEON] in { 1468 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1469 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; 1470 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1471 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>; 1472 foreach Ty = [v4f32, v2f64] in { 1473 def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))), 1474 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>; 1475 def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))), 1476 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>; 1477 } 1478} 1479 1480multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> { 1481 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1482 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>; 1483 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1484 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>; 1485 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1486 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>; 1487 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1488 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>; 1489} 1490 1491multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> { 1492 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1493 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>; 1494 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1495 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>; 1496 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1497 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>; 1498 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1499 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>; 1500} 1501 1502 1503let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1504 defm : FCMLA_PATS<v4f16, V64>; 1505 defm : FCMLA_PATS<v8f16, V128>; 1506 1507 defm : FCMLA_LANE_PATS<v4f16, V64, 1508 (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>; 1509 defm : FCMLA_LANE_PATS<v8f16, V128, 1510 (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>; 1511} 1512let Predicates = [HasComplxNum, HasNEON] in { 1513 defm : FCMLA_PATS<v2f32, V64>; 1514 defm : FCMLA_PATS<v4f32, V128>; 1515 defm : FCMLA_PATS<v2f64, V128>; 1516 1517 defm : FCMLA_LANE_PATS<v4f32, V128, 1518 (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>; 1519} 1520 1521// v8.3a Pointer Authentication 1522// These instructions inhabit part of the hint space and so can be used for 1523// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is 1524// important for compatibility with other assemblers (e.g. GAS) when building 1525// software compatible with both CPUs that do or don't implement PA. 1526let Uses = [LR], Defs = [LR] in { 1527 def PACIAZ : SystemNoOperands<0b000, "hint\t#24">; 1528 def PACIBZ : SystemNoOperands<0b010, "hint\t#26">; 1529 let isAuthenticated = 1 in { 1530 def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">; 1531 def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">; 1532 } 1533} 1534let Uses = [LR, SP], Defs = [LR] in { 1535 def PACIASP : SystemNoOperands<0b001, "hint\t#25">; 1536 def PACIBSP : SystemNoOperands<0b011, "hint\t#27">; 1537 let isAuthenticated = 1 in { 1538 def AUTIASP : SystemNoOperands<0b101, "hint\t#29">; 1539 def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">; 1540 } 1541} 1542let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { 1543 def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">; 1544 def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">; 1545 let isAuthenticated = 1 in { 1546 def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">; 1547 def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">; 1548 } 1549} 1550 1551let Uses = [LR], Defs = [LR], CRm = 0b0000 in { 1552 def XPACLRI : SystemNoOperands<0b111, "hint\t#7">; 1553} 1554 1555// In order to be able to write readable assembly, LLVM should accept assembly 1556// inputs that use pointer authentication mnemonics, even with PA disabled. 1557// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1558// should not emit these mnemonics unless PA is enabled. 1559def : InstAlias<"paciaz", (PACIAZ), 0>; 1560def : InstAlias<"pacibz", (PACIBZ), 0>; 1561def : InstAlias<"autiaz", (AUTIAZ), 0>; 1562def : InstAlias<"autibz", (AUTIBZ), 0>; 1563def : InstAlias<"paciasp", (PACIASP), 0>; 1564def : InstAlias<"pacibsp", (PACIBSP), 0>; 1565def : InstAlias<"autiasp", (AUTIASP), 0>; 1566def : InstAlias<"autibsp", (AUTIBSP), 0>; 1567def : InstAlias<"pacia1716", (PACIA1716), 0>; 1568def : InstAlias<"pacib1716", (PACIB1716), 0>; 1569def : InstAlias<"autia1716", (AUTIA1716), 0>; 1570def : InstAlias<"autib1716", (AUTIB1716), 0>; 1571def : InstAlias<"xpaclri", (XPACLRI), 0>; 1572 1573// Pseudos 1574 1575let Uses = [LR, SP], Defs = [LR] in { 1576// Insertion point of LR signing code. 1577def PAUTH_PROLOGUE : Pseudo<(outs), (ins), []>, Sched<[]>; 1578// Insertion point of LR authentication code. 1579// The RET terminator of the containing machine basic block may be replaced 1580// with a combined RETA(A|B) instruction when rewriting this Pseudo. 1581def PAUTH_EPILOGUE : Pseudo<(outs), (ins), []>, Sched<[]>; 1582} 1583 1584// These pointer authentication instructions require armv8.3a 1585let Predicates = [HasPAuth] in { 1586 1587 // When PA is enabled, a better mnemonic should be emitted. 1588 def : InstAlias<"paciaz", (PACIAZ), 1>; 1589 def : InstAlias<"pacibz", (PACIBZ), 1>; 1590 def : InstAlias<"autiaz", (AUTIAZ), 1>; 1591 def : InstAlias<"autibz", (AUTIBZ), 1>; 1592 def : InstAlias<"paciasp", (PACIASP), 1>; 1593 def : InstAlias<"pacibsp", (PACIBSP), 1>; 1594 def : InstAlias<"autiasp", (AUTIASP), 1>; 1595 def : InstAlias<"autibsp", (AUTIBSP), 1>; 1596 def : InstAlias<"pacia1716", (PACIA1716), 1>; 1597 def : InstAlias<"pacib1716", (PACIB1716), 1>; 1598 def : InstAlias<"autia1716", (AUTIA1716), 1>; 1599 def : InstAlias<"autib1716", (AUTIB1716), 1>; 1600 def : InstAlias<"xpaclri", (XPACLRI), 1>; 1601 1602 multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm, 1603 SDPatternOperator op> { 1604 def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>; 1605 def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>; 1606 def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>; 1607 def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>; 1608 def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>; 1609 def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>; 1610 def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>; 1611 def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>; 1612 } 1613 1614 defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>; 1615 defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>; 1616 1617 def XPACI : ClearAuth<0, "xpaci">; 1618 def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>; 1619 def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>; 1620 1621 def XPACD : ClearAuth<1, "xpacd">; 1622 def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>; 1623 def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>; 1624 1625 def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>; 1626 1627 // Combined Instructions 1628 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1629 def BRAA : AuthBranchTwoOperands<0, 0, "braa">; 1630 def BRAB : AuthBranchTwoOperands<0, 1, "brab">; 1631 } 1632 let isCall = 1, Defs = [LR], Uses = [SP] in { 1633 def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; 1634 def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; 1635 } 1636 1637 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1638 def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; 1639 def BRABZ : AuthOneOperand<0b000, 1, "brabz">; 1640 } 1641 let isCall = 1, Defs = [LR], Uses = [SP] in { 1642 def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; 1643 def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; 1644 } 1645 1646 let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 1647 def RETAA : AuthReturn<0b010, 0, "retaa">; 1648 def RETAB : AuthReturn<0b010, 1, "retab">; 1649 def ERETAA : AuthReturn<0b100, 0, "eretaa">; 1650 def ERETAB : AuthReturn<0b100, 1, "eretab">; 1651 } 1652 1653 defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; 1654 defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; 1655 1656} 1657 1658// v9.5-A pointer authentication extensions 1659 1660// Always accept "pacm" as an alias for "hint #39", but don't emit it when 1661// disassembling if we don't have the pauth-lr feature. 1662let CRm = 0b0100 in { 1663 def PACM : SystemNoOperands<0b111, "hint\t#39">; 1664} 1665def : InstAlias<"pacm", (PACM), 0>; 1666 1667let Predicates = [HasPAuthLR] in { 1668 let Defs = [LR], Uses = [LR, SP] in { 1669 // opcode2, opcode, asm 1670 def PACIASPPC : SignAuthFixedRegs<0b00001, 0b101000, "paciasppc">; 1671 def PACIBSPPC : SignAuthFixedRegs<0b00001, 0b101001, "pacibsppc">; 1672 def PACNBIASPPC : SignAuthFixedRegs<0b00001, 0b100000, "pacnbiasppc">; 1673 def PACNBIBSPPC : SignAuthFixedRegs<0b00001, 0b100001, "pacnbibsppc">; 1674 // opc, asm 1675 def AUTIASPPCi : SignAuthPCRel<0b00, "autiasppc">; 1676 def AUTIBSPPCi : SignAuthPCRel<0b01, "autibsppc">; 1677 // opcode2, opcode, asm 1678 def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppc">; 1679 def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppc">; 1680 // opcode2, opcode, asm 1681 def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">; 1682 def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">; 1683 def AUTIA171615 : SignAuthFixedRegs<0b00001, 0b101110, "autia171615">; 1684 def AUTIB171615 : SignAuthFixedRegs<0b00001, 0b101111, "autib171615">; 1685 } 1686 1687 let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in { 1688 // opc, op2, asm 1689 def RETAASPPCi : SignAuthReturnPCRel<0b000, 0b11111, "retaasppc">; 1690 def RETABSPPCi : SignAuthReturnPCRel<0b001, 0b11111, "retabsppc">; 1691 // op3, asm 1692 def RETAASPPCr : SignAuthReturnReg<0b000010, "retaasppc">; 1693 def RETABSPPCr : SignAuthReturnReg<0b000011, "retabsppc">; 1694 } 1695 def : InstAlias<"pacm", (PACM), 1>; 1696} 1697 1698 1699// v8.3a floating point conversion for javascript 1700let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in 1701def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, 1702 "fjcvtzs", 1703 [(set GPR32:$Rd, 1704 (int_aarch64_fjcvtzs FPR64:$Rn))]> { 1705 let Inst{31} = 0; 1706} // HasJS, HasFPARMv8 1707 1708// v8.4 Flag manipulation instructions 1709let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in { 1710def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { 1711 let Inst{20-5} = 0b0000001000000000; 1712} 1713def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">; 1714def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; 1715def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", 1716 "{\t$Rn, $imm, $mask}">; 1717} // HasFlagM 1718 1719// v8.5 flag manipulation instructions 1720let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in { 1721 1722def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> { 1723 let Inst{18-16} = 0b000; 1724 let Inst{11-8} = 0b0000; 1725 let Unpredictable{11-8} = 0b1111; 1726 let Inst{7-5} = 0b001; 1727} 1728 1729def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> { 1730 let Inst{18-16} = 0b000; 1731 let Inst{11-8} = 0b0000; 1732 let Unpredictable{11-8} = 0b1111; 1733 let Inst{7-5} = 0b010; 1734} 1735} // HasAltNZCV 1736 1737 1738// Armv8.5-A speculation barrier 1739def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> { 1740 let Inst{20-5} = 0b0001100110000111; 1741 let Unpredictable{11-8} = 0b1111; 1742 let Predicates = [HasSB]; 1743 let hasSideEffects = 1; 1744} 1745 1746def : InstAlias<"clrex", (CLREX 0xf)>; 1747def : InstAlias<"isb", (ISB 0xf)>; 1748def : InstAlias<"ssbb", (DSB 0)>; 1749def : InstAlias<"pssbb", (DSB 4)>; 1750def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>; 1751 1752def MRS : MRSI; 1753def MSR : MSRI; 1754def MSRpstateImm1 : MSRpstateImm0_1; 1755def MSRpstateImm4 : MSRpstateImm0_15; 1756 1757def : Pat<(AArch64mrs imm:$id), 1758 (MRS imm:$id)>; 1759 1760// The thread pointer (on Linux, at least, where this has been implemented) is 1761// TPIDR_EL0. 1762def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), 1763 [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; 1764 1765// This gets lowered into a 24-byte instruction sequence 1766let Defs = [ X9, X16, X17, NZCV ], Size = 24 in { 1767def KCFI_CHECK : Pseudo< 1768 (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>; 1769} 1770 1771let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { 1772def HWASAN_CHECK_MEMACCESS : Pseudo< 1773 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1774 [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1775 Sched<[]>; 1776} 1777 1778let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in { 1779def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< 1780 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1781 [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1782 Sched<[]>; 1783} 1784 1785// The virtual cycle counter register is CNTVCT_EL0. 1786def : Pat<(readcyclecounter), (MRS 0xdf02)>; 1787 1788// FPCR register 1789let Uses = [FPCR] in 1790def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins), 1791 [(set GPR64:$dst, (int_aarch64_get_fpcr))]>, 1792 PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>, 1793 Sched<[WriteSys]>; 1794let Defs = [FPCR] in 1795def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val), 1796 [(int_aarch64_set_fpcr i64:$val)]>, 1797 PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>, 1798 Sched<[WriteSys]>; 1799 1800// Generic system instructions 1801def SYSxt : SystemXtI<0, "sys">; 1802def SYSLxt : SystemLXtI<1, "sysl">; 1803 1804def : InstAlias<"sys $op1, $Cn, $Cm, $op2", 1805 (SYSxt imm0_7:$op1, sys_cr_op:$Cn, 1806 sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 1807 1808 1809let Predicates = [HasTME] in { 1810 1811def TSTART : TMSystemI<0b0000, "tstart", 1812 [(set GPR64:$Rt, (int_aarch64_tstart))]>; 1813 1814def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; 1815 1816def TCANCEL : TMSystemException<0b011, "tcancel", 1817 [(int_aarch64_tcancel timm64_0_65535:$imm)]>; 1818 1819def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> { 1820 let mayLoad = 0; 1821 let mayStore = 0; 1822} 1823} // HasTME 1824 1825//===----------------------------------------------------------------------===// 1826// Move immediate instructions. 1827//===----------------------------------------------------------------------===// 1828 1829defm MOVK : InsertImmediate<0b11, "movk">; 1830defm MOVN : MoveImmediate<0b00, "movn">; 1831 1832let PostEncoderMethod = "fixMOVZ" in 1833defm MOVZ : MoveImmediate<0b10, "movz">; 1834 1835// First group of aliases covers an implicit "lsl #0". 1836def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>; 1837def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>; 1838def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1839def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1840def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1841def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1842 1843// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. 1844def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1845def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1846def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1847def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1848 1849def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1850def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1851def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1852def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1853 1854def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>; 1855def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>; 1856def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>; 1857def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>; 1858 1859def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1860def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1861 1862def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1863def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1864 1865def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>; 1866def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>; 1867 1868// Final group of aliases covers true "mov $Rd, $imm" cases. 1869multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, 1870 int width, int shift> { 1871 def _asmoperand : AsmOperandClass { 1872 let Name = basename # width # "_lsl" # shift # "MovAlias"; 1873 let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " 1874 # shift # ">"; 1875 let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; 1876 } 1877 1878 def _movimm : Operand<i32> { 1879 let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); 1880 } 1881 1882 def : InstAlias<"mov $Rd, $imm", 1883 (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; 1884} 1885 1886defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; 1887defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; 1888 1889defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; 1890defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; 1891defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; 1892defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; 1893 1894defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; 1895defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; 1896 1897defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; 1898defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; 1899defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; 1900defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; 1901 1902let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, 1903 isAsCheapAsAMove = 1 in { 1904// FIXME: The following pseudo instructions are only needed because remat 1905// cannot handle multiple instructions. When that changes, we can select 1906// directly to the real instructions and get rid of these pseudos. 1907 1908def MOVi32imm 1909 : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), 1910 [(set GPR32:$dst, imm:$src)]>, 1911 Sched<[WriteImm]>; 1912def MOVi64imm 1913 : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), 1914 [(set GPR64:$dst, imm:$src)]>, 1915 Sched<[WriteImm]>; 1916} // isReMaterializable, isCodeGenOnly 1917 1918// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the 1919// eventual expansion code fewer bits to worry about getting right. Marshalling 1920// the types is a little tricky though: 1921def i64imm_32bit : ImmLeaf<i64, [{ 1922 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 1923}]>; 1924 1925def s64imm_32bit : ImmLeaf<i64, [{ 1926 int64_t Imm64 = static_cast<int64_t>(Imm); 1927 return Imm64 >= std::numeric_limits<int32_t>::min() && 1928 Imm64 <= std::numeric_limits<int32_t>::max(); 1929}]>; 1930 1931def trunc_imm : SDNodeXForm<imm, [{ 1932 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); 1933}]>; 1934 1935def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, 1936 GISDNodeXFormEquiv<trunc_imm>; 1937 1938let Predicates = [OptimizedGISelOrOtherSelector] in { 1939// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless 1940// copies. 1941def : Pat<(i64 i64imm_32bit:$src), 1942 (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; 1943} 1944 1945// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). 1946def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 1947return CurDAG->getTargetConstant( 1948 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 1949}]>; 1950 1951def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 1952return CurDAG->getTargetConstant( 1953 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 1954}]>; 1955 1956 1957def : Pat<(f32 fpimm:$in), 1958 (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; 1959def : Pat<(f64 fpimm:$in), 1960 (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; 1961 1962 1963// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK 1964// sequences. 1965def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, 1966 tglobaladdr:$g1, tglobaladdr:$g0), 1967 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), 1968 tglobaladdr:$g1, 16), 1969 tglobaladdr:$g2, 32), 1970 tglobaladdr:$g3, 48)>; 1971 1972def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, 1973 tblockaddress:$g1, tblockaddress:$g0), 1974 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), 1975 tblockaddress:$g1, 16), 1976 tblockaddress:$g2, 32), 1977 tblockaddress:$g3, 48)>; 1978 1979def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, 1980 tconstpool:$g1, tconstpool:$g0), 1981 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), 1982 tconstpool:$g1, 16), 1983 tconstpool:$g2, 32), 1984 tconstpool:$g3, 48)>; 1985 1986def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, 1987 tjumptable:$g1, tjumptable:$g0), 1988 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), 1989 tjumptable:$g1, 16), 1990 tjumptable:$g2, 32), 1991 tjumptable:$g3, 48)>; 1992 1993 1994//===----------------------------------------------------------------------===// 1995// Arithmetic instructions. 1996//===----------------------------------------------------------------------===// 1997 1998// Add/subtract with carry. 1999defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; 2000defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; 2001 2002def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; 2003def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; 2004def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; 2005def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; 2006 2007// Add/subtract 2008defm ADD : AddSub<0, "add", "sub", add>; 2009defm SUB : AddSub<1, "sub", "add">; 2010 2011def : InstAlias<"mov $dst, $src", 2012 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; 2013def : InstAlias<"mov $dst, $src", 2014 (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; 2015def : InstAlias<"mov $dst, $src", 2016 (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; 2017def : InstAlias<"mov $dst, $src", 2018 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; 2019 2020defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; 2021defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; 2022 2023def copyFromSP: PatLeaf<(i64 GPR64:$src), [{ 2024 return N->getOpcode() == ISD::CopyFromReg && 2025 cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP; 2026}]>; 2027 2028// Use SUBS instead of SUB to enable CSE between SUBS and SUB. 2029def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), 2030 (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; 2031def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), 2032 (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; 2033def : Pat<(sub GPR32:$Rn, GPR32:$Rm), 2034 (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; 2035def : Pat<(sub GPR64:$Rn, GPR64:$Rm), 2036 (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; 2037def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), 2038 (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; 2039def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), 2040 (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; 2041let AddedComplexity = 1 in { 2042def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), 2043 (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; 2044def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), 2045 (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; 2046def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)), 2047 (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>; 2048} 2049 2050// Because of the immediate format for add/sub-imm instructions, the 2051// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 2052// These patterns capture that transformation. 2053let AddedComplexity = 1 in { 2054def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2055 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2056def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2057 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2058def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2059 (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2060def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2061 (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2062} 2063 2064// Because of the immediate format for add/sub-imm instructions, the 2065// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 2066// These patterns capture that transformation. 2067let AddedComplexity = 1 in { 2068def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2069 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2070def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2071 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2072def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2073 (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2074def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2075 (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2076} 2077 2078def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 2079def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 2080def : InstAlias<"neg $dst, $src$shift", 2081 (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 2082def : InstAlias<"neg $dst, $src$shift", 2083 (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 2084 2085def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 2086def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 2087def : InstAlias<"negs $dst, $src$shift", 2088 (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 2089def : InstAlias<"negs $dst, $src$shift", 2090 (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 2091 2092 2093// Unsigned/Signed divide 2094defm UDIV : Div<0, "udiv", udiv>; 2095defm SDIV : Div<1, "sdiv", sdiv>; 2096 2097def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; 2098def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; 2099def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; 2100def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; 2101 2102// Variable shift 2103defm ASRV : Shift<0b10, "asr", sra>; 2104defm LSLV : Shift<0b00, "lsl", shl>; 2105defm LSRV : Shift<0b01, "lsr", srl>; 2106defm RORV : Shift<0b11, "ror", rotr>; 2107 2108def : ShiftAlias<"asrv", ASRVWr, GPR32>; 2109def : ShiftAlias<"asrv", ASRVXr, GPR64>; 2110def : ShiftAlias<"lslv", LSLVWr, GPR32>; 2111def : ShiftAlias<"lslv", LSLVXr, GPR64>; 2112def : ShiftAlias<"lsrv", LSRVWr, GPR32>; 2113def : ShiftAlias<"lsrv", LSRVXr, GPR64>; 2114def : ShiftAlias<"rorv", RORVWr, GPR32>; 2115def : ShiftAlias<"rorv", RORVXr, GPR64>; 2116 2117// Multiply-add 2118let AddedComplexity = 5 in { 2119defm MADD : MulAccum<0, "madd">; 2120defm MSUB : MulAccum<1, "msub">; 2121 2122def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), 2123 (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 2124def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), 2125 (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 2126 2127def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), 2128 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 2129def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), 2130 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 2131def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), 2132 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 2133def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), 2134 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 2135} // AddedComplexity = 5 2136 2137let AddedComplexity = 5 in { 2138def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; 2139def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; 2140def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; 2141def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; 2142 2143def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))), 2144 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2145def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))), 2146 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2147def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), 2148 (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2149def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))), 2150 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2151def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))), 2152 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2153def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), 2154 (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2155 2156def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), 2157 (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2158def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), 2159 (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2160 2161def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), 2162 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2163def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), 2164 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2165def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), 2166 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2167 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2168 2169def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 2170 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2171def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 2172 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2173def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), 2174 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2175 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2176 2177def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), 2178 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2179def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), 2180 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2181def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), 2182 GPR64:$Ra)), 2183 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2184 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2185 2186def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 2187 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2188def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 2189 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2190def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), 2191 (s64imm_32bit:$C)))), 2192 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2193 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2194 2195def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)), 2196 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2197def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))), 2198 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2199 2200def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)), 2201 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2202def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)), 2203 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2204 2205def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 2206 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2207def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 2208 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2209 2210def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 2211 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2212def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 2213 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2214 2215def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)), 2216 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2217def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))), 2218 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2219 2220def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)), 2221 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2222def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)), 2223 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2224 2225def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))), 2226 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2227def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 2228 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2229 2230def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))), 2231 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2232def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 2233 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2234} // AddedComplexity = 5 2235 2236def : MulAccumWAlias<"mul", MADDWrrr>; 2237def : MulAccumXAlias<"mul", MADDXrrr>; 2238def : MulAccumWAlias<"mneg", MSUBWrrr>; 2239def : MulAccumXAlias<"mneg", MSUBXrrr>; 2240def : WideMulAccumAlias<"smull", SMADDLrrr>; 2241def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; 2242def : WideMulAccumAlias<"umull", UMADDLrrr>; 2243def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; 2244 2245// Multiply-high 2246def SMULHrr : MulHi<0b010, "smulh", mulhs>; 2247def UMULHrr : MulHi<0b110, "umulh", mulhu>; 2248 2249// CRC32 2250def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; 2251def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; 2252def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; 2253def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; 2254 2255def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; 2256def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; 2257def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; 2258def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; 2259 2260// v8.1 atomic CAS 2261defm CAS : CompareAndSwap<0, 0, "">; 2262defm CASA : CompareAndSwap<1, 0, "a">; 2263defm CASL : CompareAndSwap<0, 1, "l">; 2264defm CASAL : CompareAndSwap<1, 1, "al">; 2265 2266// v8.1 atomic CASP 2267defm CASP : CompareAndSwapPair<0, 0, "">; 2268defm CASPA : CompareAndSwapPair<1, 0, "a">; 2269defm CASPL : CompareAndSwapPair<0, 1, "l">; 2270defm CASPAL : CompareAndSwapPair<1, 1, "al">; 2271 2272// v8.1 atomic SWP 2273defm SWP : Swap<0, 0, "">; 2274defm SWPA : Swap<1, 0, "a">; 2275defm SWPL : Swap<0, 1, "l">; 2276defm SWPAL : Swap<1, 1, "al">; 2277 2278// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) 2279defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; 2280defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; 2281defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; 2282defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; 2283 2284defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; 2285defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; 2286defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; 2287defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; 2288 2289defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; 2290defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; 2291defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; 2292defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; 2293 2294defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; 2295defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; 2296defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; 2297defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; 2298 2299defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; 2300defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; 2301defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; 2302defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; 2303 2304defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; 2305defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; 2306defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; 2307defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; 2308 2309defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; 2310defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; 2311defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; 2312defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; 2313 2314defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; 2315defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; 2316defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; 2317defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; 2318 2319// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" 2320defm : STOPregister<"stadd","LDADD">; // STADDx 2321defm : STOPregister<"stclr","LDCLR">; // STCLRx 2322defm : STOPregister<"steor","LDEOR">; // STEORx 2323defm : STOPregister<"stset","LDSET">; // STSETx 2324defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx 2325defm : STOPregister<"stsmin","LDSMIN">;// STSMINx 2326defm : STOPregister<"stumax","LDUMAX">;// STUMAXx 2327defm : STOPregister<"stumin","LDUMIN">;// STUMINx 2328 2329// v8.5 Memory Tagging Extension 2330let Predicates = [HasMTE] in { 2331 2332def IRG : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg", 2333 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>; 2334 2335def GMI : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi", 2336 int_aarch64_gmi, GPR64sp>, Sched<[]> { 2337 let isNotDuplicable = 1; 2338} 2339def ADDG : AddSubG<0, "addg", null_frag>; 2340def SUBG : AddSubG<1, "subg", null_frag>; 2341 2342def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>; 2343 2344def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>; 2345def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{ 2346 let Defs = [NZCV]; 2347} 2348 2349def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>; 2350 2351def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">; 2352 2353def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4), 2354 (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>; 2355def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2356 (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2357 2358def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>; 2359 2360def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]", 2361 (outs GPR64:$Rt), (ins GPR64sp:$Rn)>; 2362def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]", 2363 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>; 2364def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]", 2365 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> { 2366 let Inst{23} = 0; 2367} 2368 2369defm STG : MemTagStore<0b00, "stg">; 2370defm STZG : MemTagStore<0b01, "stzg">; 2371defm ST2G : MemTagStore<0b10, "st2g">; 2372defm STZ2G : MemTagStore<0b11, "stz2g">; 2373 2374def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2375 (STGi $Rn, $Rm, $imm)>; 2376def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2377 (STZGi $Rn, $Rm, $imm)>; 2378def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2379 (ST2Gi $Rn, $Rm, $imm)>; 2380def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2381 (STZ2Gi $Rn, $Rm, $imm)>; 2382 2383defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">; 2384def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">; 2385def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; 2386 2387def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2388 (STGi GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2389 2390def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2), 2391 (STGPi $Rt, $Rt2, $Rn, $imm)>; 2392 2393def IRGstack 2394 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>, 2395 Sched<[]>; 2396def TAGPstack 2397 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>, 2398 Sched<[]>; 2399 2400// Explicit SP in the first operand prevents ShrinkWrap optimization 2401// from leaving this instruction out of the stack frame. When IRGstack 2402// is transformed into IRG, this operand is replaced with the actual 2403// register / expression for the tagged base pointer of the current function. 2404def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; 2405 2406// Large STG to be expanded into a loop. $sz is the size, $Rn is start address. 2407// $Rn_wback is one past the end of the range. $Rm is the loop counter. 2408let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in { 2409def STGloop_wback 2410 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2411 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2412 Sched<[WriteAdr, WriteST]>; 2413 2414def STZGloop_wback 2415 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2416 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2417 Sched<[WriteAdr, WriteST]>; 2418 2419// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn. 2420// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back). 2421def STGloop 2422 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2423 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2424 Sched<[WriteAdr, WriteST]>; 2425 2426def STZGloop 2427 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2428 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2429 Sched<[WriteAdr, WriteST]>; 2430} 2431 2432} // Predicates = [HasMTE] 2433 2434//===----------------------------------------------------------------------===// 2435// Logical instructions. 2436//===----------------------------------------------------------------------===// 2437 2438// (immediate) 2439defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; 2440defm AND : LogicalImm<0b00, "and", and, "bic">; 2441defm EOR : LogicalImm<0b10, "eor", xor, "eon">; 2442defm ORR : LogicalImm<0b01, "orr", or, "orn">; 2443 2444// FIXME: these aliases *are* canonical sometimes (when movz can't be 2445// used). Actually, it seems to be working right now, but putting logical_immXX 2446// here is a bit dodgy on the AsmParser side too. 2447def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, 2448 logical_imm32:$imm), 0>; 2449def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, 2450 logical_imm64:$imm), 0>; 2451 2452 2453// (register) 2454defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; 2455defm BICS : LogicalRegS<0b11, 1, "bics", 2456 BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; 2457defm AND : LogicalReg<0b00, 0, "and", and>; 2458defm BIC : LogicalReg<0b00, 1, "bic", 2459 BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>; 2460defm EON : LogicalReg<0b10, 1, "eon", 2461 BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; 2462defm EOR : LogicalReg<0b10, 0, "eor", xor>; 2463defm ORN : LogicalReg<0b01, 1, "orn", 2464 BinOpFrag<(or node:$LHS, (not node:$RHS))>>; 2465defm ORR : LogicalReg<0b01, 0, "orr", or>; 2466 2467def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; 2468def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; 2469 2470def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; 2471def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; 2472 2473def : InstAlias<"mvn $Wd, $Wm$sh", 2474 (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; 2475def : InstAlias<"mvn $Xd, $Xm$sh", 2476 (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; 2477 2478def : InstAlias<"tst $src1, $src2", 2479 (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; 2480def : InstAlias<"tst $src1, $src2", 2481 (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; 2482 2483def : InstAlias<"tst $src1, $src2", 2484 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; 2485def : InstAlias<"tst $src1, $src2", 2486 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; 2487 2488def : InstAlias<"tst $src1, $src2$sh", 2489 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; 2490def : InstAlias<"tst $src1, $src2$sh", 2491 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; 2492 2493 2494def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; 2495def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; 2496 2497 2498//===----------------------------------------------------------------------===// 2499// One operand data processing instructions. 2500//===----------------------------------------------------------------------===// 2501 2502defm CLS : OneOperandData<0b000101, "cls">; 2503defm CLZ : OneOperandData<0b000100, "clz", ctlz>; 2504defm RBIT : OneOperandData<0b000000, "rbit", bitreverse>; 2505 2506def REV16Wr : OneWRegData<0b000001, "rev16", 2507 UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; 2508def REV16Xr : OneXRegData<0b000001, "rev16", null_frag>; 2509 2510def : Pat<(cttz GPR32:$Rn), 2511 (CLZWr (RBITWr GPR32:$Rn))>; 2512def : Pat<(cttz GPR64:$Rn), 2513 (CLZXr (RBITXr GPR64:$Rn))>; 2514def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), 2515 (i32 1))), 2516 (CLSWr GPR32:$Rn)>; 2517def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), 2518 (i64 1))), 2519 (CLSXr GPR64:$Rn)>; 2520def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>; 2521def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>; 2522 2523// Unlike the other one operand instructions, the instructions with the "rev" 2524// mnemonic do *not* just different in the size bit, but actually use different 2525// opcode bits for the different sizes. 2526def REVWr : OneWRegData<0b000010, "rev", bswap>; 2527def REVXr : OneXRegData<0b000011, "rev", bswap>; 2528def REV32Xr : OneXRegData<0b000010, "rev32", 2529 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; 2530 2531def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; 2532 2533// The bswap commutes with the rotr so we want a pattern for both possible 2534// orders. 2535def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; 2536def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; 2537 2538// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero. 2539def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; 2540def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; 2541 2542def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)), 2543 (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))), 2544 (REV16Xr GPR64:$Rn)>; 2545 2546//===----------------------------------------------------------------------===// 2547// Bitfield immediate extraction instruction. 2548//===----------------------------------------------------------------------===// 2549let hasSideEffects = 0 in 2550defm EXTR : ExtractImm<"extr">; 2551def : InstAlias<"ror $dst, $src, $shift", 2552 (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; 2553def : InstAlias<"ror $dst, $src, $shift", 2554 (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; 2555 2556def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), 2557 (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; 2558def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), 2559 (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; 2560 2561//===----------------------------------------------------------------------===// 2562// Other bitfield immediate instructions. 2563//===----------------------------------------------------------------------===// 2564let hasSideEffects = 0 in { 2565defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; 2566defm SBFM : BitfieldImm<0b00, "sbfm">; 2567defm UBFM : BitfieldImm<0b10, "ubfm">; 2568} 2569 2570def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2571 uint64_t enc = (32 - N->getZExtValue()) & 0x1f; 2572 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2573}]>; 2574 2575def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2576 uint64_t enc = 31 - N->getZExtValue(); 2577 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2578}]>; 2579 2580// min(7, 31 - shift_amt) 2581def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2582 uint64_t enc = 31 - N->getZExtValue(); 2583 enc = enc > 7 ? 7 : enc; 2584 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2585}]>; 2586 2587// min(15, 31 - shift_amt) 2588def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2589 uint64_t enc = 31 - N->getZExtValue(); 2590 enc = enc > 15 ? 15 : enc; 2591 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2592}]>; 2593 2594def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2595 uint64_t enc = (64 - N->getZExtValue()) & 0x3f; 2596 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2597}]>; 2598 2599def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2600 uint64_t enc = 63 - N->getZExtValue(); 2601 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2602}]>; 2603 2604// min(7, 63 - shift_amt) 2605def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2606 uint64_t enc = 63 - N->getZExtValue(); 2607 enc = enc > 7 ? 7 : enc; 2608 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2609}]>; 2610 2611// min(15, 63 - shift_amt) 2612def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2613 uint64_t enc = 63 - N->getZExtValue(); 2614 enc = enc > 15 ? 15 : enc; 2615 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2616}]>; 2617 2618// min(31, 63 - shift_amt) 2619def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ 2620 uint64_t enc = 63 - N->getZExtValue(); 2621 enc = enc > 31 ? 31 : enc; 2622 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2623}]>; 2624 2625def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), 2626 (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 2627 (i64 (i32shift_b imm0_31:$imm)))>; 2628def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), 2629 (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 2630 (i64 (i64shift_b imm0_63:$imm)))>; 2631 2632let AddedComplexity = 10 in { 2633def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), 2634 (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2635def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), 2636 (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2637} 2638 2639def : InstAlias<"asr $dst, $src, $shift", 2640 (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2641def : InstAlias<"asr $dst, $src, $shift", 2642 (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2643def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2644def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2645def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2646def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2647def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2648 2649def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), 2650 (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2651def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), 2652 (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2653 2654def : InstAlias<"lsr $dst, $src, $shift", 2655 (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2656def : InstAlias<"lsr $dst, $src, $shift", 2657 (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2658def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2659def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2660def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2661def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2662def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2663 2664//===----------------------------------------------------------------------===// 2665// Conditional comparison instructions. 2666//===----------------------------------------------------------------------===// 2667defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; 2668defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; 2669 2670//===----------------------------------------------------------------------===// 2671// Conditional select instructions. 2672//===----------------------------------------------------------------------===// 2673defm CSEL : CondSelect<0, 0b00, "csel">; 2674 2675def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; 2676defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; 2677defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; 2678defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; 2679 2680def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2681 (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2682def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2683 (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2684def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2685 (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2686def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2687 (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2688def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2689 (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2690def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2691 (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2692 2693def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), 2694 (CSINCWr WZR, WZR, (i32 imm:$cc))>; 2695def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), 2696 (CSINCXr XZR, XZR, (i32 imm:$cc))>; 2697def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), 2698 (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2699def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), 2700 (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2701def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), 2702 (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2703def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), 2704 (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2705def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), 2706 (CSINVWr WZR, WZR, (i32 imm:$cc))>; 2707def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), 2708 (CSINVXr XZR, XZR, (i32 imm:$cc))>; 2709def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), 2710 (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2711def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), 2712 (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2713def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), 2714 (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2715def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), 2716 (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2717 2718def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2719 (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>; 2720def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2721 (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>; 2722 2723def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2724 (CSINCWr GPR32:$val, WZR, imm:$cc)>; 2725def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2726 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2727def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2728 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2729 2730def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2731 (CSELWr WZR, GPR32:$val, imm:$cc)>; 2732def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2733 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2734def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2735 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2736 2737// The inverse of the condition code from the alias instruction is what is used 2738// in the aliased instruction. The parser all ready inverts the condition code 2739// for these aliases. 2740def : InstAlias<"cset $dst, $cc", 2741 (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2742def : InstAlias<"cset $dst, $cc", 2743 (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2744 2745def : InstAlias<"csetm $dst, $cc", 2746 (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2747def : InstAlias<"csetm $dst, $cc", 2748 (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2749 2750def : InstAlias<"cinc $dst, $src, $cc", 2751 (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2752def : InstAlias<"cinc $dst, $src, $cc", 2753 (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2754 2755def : InstAlias<"cinv $dst, $src, $cc", 2756 (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2757def : InstAlias<"cinv $dst, $src, $cc", 2758 (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2759 2760def : InstAlias<"cneg $dst, $src, $cc", 2761 (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2762def : InstAlias<"cneg $dst, $src, $cc", 2763 (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2764 2765//===----------------------------------------------------------------------===// 2766// PC-relative instructions. 2767//===----------------------------------------------------------------------===// 2768let isReMaterializable = 1 in { 2769let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 2770def ADR : ADRI<0, "adr", adrlabel, 2771 [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>; 2772} // hasSideEffects = 0 2773 2774def ADRP : ADRI<1, "adrp", adrplabel, 2775 [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; 2776} // isReMaterializable = 1 2777 2778// page address of a constant pool entry, block address 2779def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>; 2780def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>; 2781def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>; 2782def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>; 2783def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; 2784def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; 2785def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; 2786 2787//===----------------------------------------------------------------------===// 2788// Unconditional branch (register) instructions. 2789//===----------------------------------------------------------------------===// 2790 2791let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 2792def RET : BranchReg<0b0010, "ret", []>; 2793def DRPS : SpecialReturn<0b0101, "drps">; 2794def ERET : SpecialReturn<0b0100, "eret">; 2795} // isReturn = 1, isTerminator = 1, isBarrier = 1 2796 2797// Default to the LR register. 2798def : InstAlias<"ret", (RET LR)>; 2799 2800let isCall = 1, Defs = [LR], Uses = [SP] in { 2801 def BLR : BranchReg<0b0001, "blr", []>; 2802 def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>, 2803 Sched<[WriteBrReg]>, 2804 PseudoInstExpansion<(BLR GPR64:$Rn)>; 2805 def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, 2806 Sched<[WriteBrReg]>; 2807 def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>, 2808 Sched<[WriteBrReg]>; 2809 let Uses = [X16, SP] in 2810 def BLR_X16 : Pseudo<(outs), (ins), [(AArch64call_arm64ec_to_x64 X16)]>, 2811 Sched<[WriteBrReg]>, 2812 PseudoInstExpansion<(BLR X16)>; 2813} // isCall 2814 2815def : Pat<(AArch64call GPR64:$Rn), 2816 (BLR GPR64:$Rn)>, 2817 Requires<[NoSLSBLRMitigation]>; 2818def : Pat<(AArch64call GPR64noip:$Rn), 2819 (BLRNoIP GPR64noip:$Rn)>, 2820 Requires<[SLSBLRMitigation]>; 2821 2822def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), 2823 (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, 2824 Requires<[NoSLSBLRMitigation]>; 2825 2826def : Pat<(AArch64call_bti GPR64:$Rn), 2827 (BLR_BTI GPR64:$Rn)>, 2828 Requires<[NoSLSBLRMitigation]>; 2829def : Pat<(AArch64call_bti GPR64noip:$Rn), 2830 (BLR_BTI GPR64noip:$Rn)>, 2831 Requires<[SLSBLRMitigation]>; 2832 2833let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 2834def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; 2835} // isBranch, isTerminator, isBarrier, isIndirectBranch 2836 2837// Create a separate pseudo-instruction for codegen to use so that we don't 2838// flag lr as used in every function. It'll be restored before the RET by the 2839// epilogue if it's legitimately used. 2840def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>, 2841 Sched<[WriteBrReg]> { 2842 let isTerminator = 1; 2843 let isBarrier = 1; 2844 let isReturn = 1; 2845} 2846 2847// This is a directive-like pseudo-instruction. The purpose is to insert an 2848// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction 2849// (which in the usual case is a BLR). 2850let hasSideEffects = 1 in 2851def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { 2852 let AsmString = ".tlsdesccall $sym"; 2853} 2854 2855// Pseudo instruction to tell the streamer to emit a 'B' character into the 2856// augmentation string. 2857def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {} 2858 2859// Pseudo instruction to tell the streamer to emit a 'G' character into the 2860// augmentation string. 2861def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {} 2862 2863// FIXME: maybe the scratch register used shouldn't be fixed to X1? 2864// FIXME: can "hasSideEffects be dropped? 2865// This gets lowered to an instruction sequence which takes 16 bytes 2866let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16, 2867 isCodeGenOnly = 1 in 2868def TLSDESC_CALLSEQ 2869 : Pseudo<(outs), (ins i64imm:$sym), 2870 [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, 2871 Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; 2872def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), 2873 (TLSDESC_CALLSEQ texternalsym:$sym)>; 2874 2875//===----------------------------------------------------------------------===// 2876// Conditional branch (immediate) instruction. 2877//===----------------------------------------------------------------------===// 2878def Bcc : BranchCond<0, "b">; 2879 2880// Armv8.8-A variant form which hints to the branch predictor that 2881// this branch is very likely to go the same way nearly all the time 2882// (even though it is not known at compile time _which_ way that is). 2883def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; 2884 2885//===----------------------------------------------------------------------===// 2886// Compare-and-branch instructions. 2887//===----------------------------------------------------------------------===// 2888defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; 2889defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; 2890 2891//===----------------------------------------------------------------------===// 2892// Test-bit-and-branch instructions. 2893//===----------------------------------------------------------------------===// 2894defm TBZ : TestBranch<0, "tbz", AArch64tbz>; 2895defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; 2896 2897//===----------------------------------------------------------------------===// 2898// Unconditional branch (immediate) instructions. 2899//===----------------------------------------------------------------------===// 2900let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 2901def B : BranchImm<0, "b", [(br bb:$addr)]>; 2902} // isBranch, isTerminator, isBarrier 2903 2904let isCall = 1, Defs = [LR], Uses = [SP] in { 2905def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; 2906} // isCall 2907def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; 2908 2909//===----------------------------------------------------------------------===// 2910// Exception generation instructions. 2911//===----------------------------------------------------------------------===// 2912let isTrap = 1 in { 2913def BRK : ExceptionGeneration<0b001, 0b00, "brk", 2914 [(int_aarch64_break timm32_0_65535:$imm)]>; 2915} 2916def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; 2917def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; 2918def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>; 2919def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; 2920def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; 2921def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>; 2922def SVC : ExceptionGeneration<0b000, 0b01, "svc">; 2923 2924// DCPSn defaults to an immediate operand of zero if unspecified. 2925def : InstAlias<"dcps1", (DCPS1 0)>; 2926def : InstAlias<"dcps2", (DCPS2 0)>; 2927def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>; 2928 2929def UDF : UDFType<0, "udf">; 2930 2931//===----------------------------------------------------------------------===// 2932// Load instructions. 2933//===----------------------------------------------------------------------===// 2934 2935// Pair (indexed, offset) 2936defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; 2937defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; 2938let Predicates = [HasFPARMv8] in { 2939defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">; 2940defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">; 2941defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">; 2942} 2943 2944defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2945 2946// Pair (pre-indexed) 2947def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2948def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2949let Predicates = [HasFPARMv8] in { 2950def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2951def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2952def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2953} 2954 2955def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2956 2957// Pair (post-indexed) 2958def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2959def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2960let Predicates = [HasFPARMv8] in { 2961def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2962def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2963def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2964} 2965 2966def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2967 2968 2969// Pair (no allocate) 2970defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">; 2971defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">; 2972let Predicates = [HasFPARMv8] in { 2973defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">; 2974defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">; 2975defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">; 2976} 2977 2978def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 2979 (LDPXi GPR64sp:$Rn, simm7s8:$offset)>; 2980 2981def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 2982 (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>; 2983//--- 2984// (register offset) 2985//--- 2986 2987// Integer 2988defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; 2989defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; 2990defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; 2991defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; 2992 2993// Floating-point 2994let Predicates = [HasFPARMv8] in { 2995defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", i8, load>; 2996defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; 2997defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; 2998defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; 2999defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>; 3000} 3001 3002// Load sign-extended half-word 3003defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; 3004defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; 3005 3006// Load sign-extended byte 3007defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; 3008defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; 3009 3010// Load sign-extended word 3011defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; 3012 3013// Pre-fetch. 3014defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; 3015 3016// For regular load, we do not have any alignment requirement. 3017// Thus, it is safe to directly map the vector loads with interesting 3018// addressing modes. 3019// FIXME: We could do the same for bitconvert to floating point vectors. 3020multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, 3021 ValueType ScalTy, ValueType VecTy, 3022 Instruction LOADW, Instruction LOADX, 3023 SubRegIndex sub> { 3024 def : Pat<(VecTy (scalar_to_vector (ScalTy 3025 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), 3026 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 3027 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), 3028 sub)>; 3029 3030 def : Pat<(VecTy (scalar_to_vector (ScalTy 3031 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), 3032 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 3033 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), 3034 sub)>; 3035} 3036 3037let AddedComplexity = 10 in { 3038defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; 3039defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; 3040 3041defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; 3042defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; 3043 3044defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>; 3045defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>; 3046 3047defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; 3048defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; 3049 3050defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; 3051defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; 3052 3053defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; 3054 3055defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; 3056 3057 3058def : Pat <(v1i64 (scalar_to_vector (i64 3059 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, 3060 ro_Wextend64:$extend))))), 3061 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; 3062 3063def : Pat <(v1i64 (scalar_to_vector (i64 3064 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, 3065 ro_Xextend64:$extend))))), 3066 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; 3067} 3068 3069// Match all load 64 bits width whose type is compatible with FPR64 3070multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, 3071 Instruction LOADW, Instruction LOADX> { 3072 3073 def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 3074 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3075 3076 def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 3077 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3078} 3079 3080let AddedComplexity = 10 in { 3081let Predicates = [IsLE] in { 3082 // We must do vector loads with LD1 in big-endian. 3083 defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; 3084 defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; 3085 defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; 3086 defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; 3087 defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>; 3088 defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>; 3089} 3090 3091defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; 3092defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; 3093 3094// Match all load 128 bits width whose type is compatible with FPR128 3095let Predicates = [IsLE] in { 3096 // We must do vector loads with LD1 in big-endian. 3097 defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; 3098 defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; 3099 defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; 3100 defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; 3101 defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; 3102 defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>; 3103 defm : VecROLoadPat<ro128, v8bf16, LDRQroW, LDRQroX>; 3104 defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; 3105} 3106} // AddedComplexity = 10 3107 3108// zextload -> i64 3109multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, 3110 Instruction INSTW, Instruction INSTX> { 3111 def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 3112 (SUBREG_TO_REG (i64 0), 3113 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 3114 sub_32)>; 3115 3116 def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 3117 (SUBREG_TO_REG (i64 0), 3118 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 3119 sub_32)>; 3120} 3121 3122let AddedComplexity = 10 in { 3123 defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; 3124 defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; 3125 defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; 3126 3127 // zextloadi1 -> zextloadi8 3128 defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 3129 3130 // extload -> zextload 3131 defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 3132 defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 3133 defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 3134 3135 // extloadi1 -> zextloadi8 3136 defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; 3137} 3138 3139 3140// zextload -> i64 3141multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, 3142 Instruction INSTW, Instruction INSTX> { 3143 def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 3144 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3145 3146 def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 3147 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3148 3149} 3150 3151let AddedComplexity = 10 in { 3152 // extload -> zextload 3153 defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 3154 defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 3155 defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 3156 3157 // zextloadi1 -> zextloadi8 3158 defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 3159} 3160 3161//--- 3162// (unsigned immediate) 3163//--- 3164defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr", 3165 [(set GPR64z:$Rt, 3166 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 3167defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr", 3168 [(set GPR32z:$Rt, 3169 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 3170let Predicates = [HasFPARMv8] in { 3171defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", 3172 [(set FPR8Op:$Rt, 3173 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; 3174defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", 3175 [(set (f16 FPR16Op:$Rt), 3176 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; 3177defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr", 3178 [(set (f32 FPR32Op:$Rt), 3179 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 3180defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr", 3181 [(set (f64 FPR64Op:$Rt), 3182 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 3183defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr", 3184 [(set (f128 FPR128Op:$Rt), 3185 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; 3186} 3187 3188// bf16 load pattern 3189def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3190 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 3191 3192// For regular load, we do not have any alignment requirement. 3193// Thus, it is safe to directly map the vector loads with interesting 3194// addressing modes. 3195// FIXME: We could do the same for bitconvert to floating point vectors. 3196def : Pat <(v8i8 (scalar_to_vector (i32 3197 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3198 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 3199 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 3200def : Pat <(v16i8 (scalar_to_vector (i32 3201 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3202 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3203 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 3204def : Pat <(v4i16 (scalar_to_vector (i32 3205 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3206 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 3207 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 3208def : Pat <(v8i16 (scalar_to_vector (i32 3209 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3210 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 3211 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 3212def : Pat <(v2i32 (scalar_to_vector (i32 3213 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3214 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 3215 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 3216def : Pat <(v4i32 (scalar_to_vector (i32 3217 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3218 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 3219 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 3220def : Pat <(v1i64 (scalar_to_vector (i64 3221 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 3222 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3223def : Pat <(v2i64 (scalar_to_vector (i64 3224 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 3225 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 3226 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; 3227 3228// Match all load 64 bits width whose type is compatible with FPR64 3229let Predicates = [IsLE] in { 3230 // We must use LD1 to perform vector loads in big-endian. 3231 def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3232 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3233 def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3234 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3235 def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3236 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3237 def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3238 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3239 def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3240 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3241 def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3242 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3243} 3244def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3245 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3246def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3247 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3248 3249// Match all load 128 bits width whose type is compatible with FPR128 3250let Predicates = [IsLE] in { 3251 // We must use LD1 to perform vector loads in big-endian. 3252 def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3253 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3254 def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3255 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3256 def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3257 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3258 def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3259 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3260 def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3261 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3262 def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3263 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3264 def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3265 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3266 def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3267 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3268} 3269def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3270 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3271 3272defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", 3273 [(set GPR32:$Rt, 3274 (zextloadi16 (am_indexed16 GPR64sp:$Rn, 3275 uimm12s2:$offset)))]>; 3276defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", 3277 [(set GPR32:$Rt, 3278 (zextloadi8 (am_indexed8 GPR64sp:$Rn, 3279 uimm12s1:$offset)))]>; 3280// zextload -> i64 3281def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3282 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3283def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3284 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3285 3286// zextloadi1 -> zextloadi8 3287def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3288 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3289def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3290 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3291 3292// extload -> zextload 3293def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3294 (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; 3295def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3296 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3297def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3298 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3299def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3300 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3301def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3302 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3303def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3304 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3305def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3306 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3307 3308// load sign-extended half-word 3309defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", 3310 [(set GPR32:$Rt, 3311 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3312 uimm12s2:$offset)))]>; 3313defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", 3314 [(set GPR64:$Rt, 3315 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3316 uimm12s2:$offset)))]>; 3317 3318// load sign-extended byte 3319defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", 3320 [(set GPR32:$Rt, 3321 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3322 uimm12s1:$offset)))]>; 3323defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", 3324 [(set GPR64:$Rt, 3325 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3326 uimm12s1:$offset)))]>; 3327 3328// load sign-extended word 3329defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", 3330 [(set GPR64:$Rt, 3331 (sextloadi32 (am_indexed32 GPR64sp:$Rn, 3332 uimm12s4:$offset)))]>; 3333 3334// load zero-extended word 3335def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3336 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3337 3338// Pre-fetch. 3339def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", 3340 [(AArch64Prefetch timm:$Rt, 3341 (am_indexed64 GPR64sp:$Rn, 3342 uimm12s8:$offset))]>; 3343 3344def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; 3345 3346//--- 3347// (literal) 3348 3349def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{ 3350 if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) { 3351 const DataLayout &DL = MF->getDataLayout(); 3352 Align Align = G->getGlobal()->getPointerAlignment(DL); 3353 return Align >= 4 && G->getOffset() % 4 == 0; 3354 } 3355 if (auto *C = dyn_cast<ConstantPoolSDNode>(N)) 3356 return C->getAlign() >= 4 && C->getOffset() % 4 == 0; 3357 return false; 3358}]>; 3359 3360def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr", 3361 [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3362def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr", 3363 [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3364let Predicates = [HasFPARMv8] in { 3365def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr", 3366 [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3367def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr", 3368 [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3369def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr", 3370 [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3371} 3372 3373// load sign-extended word 3374def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw", 3375 [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>; 3376 3377let AddedComplexity = 20 in { 3378def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))), 3379 (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>; 3380} 3381 3382// prefetch 3383def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; 3384// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; 3385 3386//--- 3387// (unscaled immediate) 3388defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur", 3389 [(set GPR64z:$Rt, 3390 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3391defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur", 3392 [(set GPR32z:$Rt, 3393 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3394let Predicates = [HasFPARMv8] in { 3395defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", 3396 [(set FPR8Op:$Rt, 3397 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3398defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", 3399 [(set (f16 FPR16Op:$Rt), 3400 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3401defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", 3402 [(set (f32 FPR32Op:$Rt), 3403 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3404defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur", 3405 [(set (f64 FPR64Op:$Rt), 3406 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3407defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur", 3408 [(set (f128 FPR128Op:$Rt), 3409 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; 3410} 3411 3412defm LDURHH 3413 : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", 3414 [(set GPR32:$Rt, 3415 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3416defm LDURBB 3417 : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", 3418 [(set GPR32:$Rt, 3419 (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3420 3421// bf16 load pattern 3422def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3423 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 3424 3425// Match all load 64 bits width whose type is compatible with FPR64 3426let Predicates = [IsLE] in { 3427 def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3428 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3429 def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3430 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3431 def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3432 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3433 def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3434 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3435 def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3436 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3437} 3438def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3439 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3440def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3441 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3442 3443// Match all load 128 bits width whose type is compatible with FPR128 3444let Predicates = [IsLE] in { 3445 def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3446 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3447 def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3448 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3449 def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3450 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3451 def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3452 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3453 def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3454 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3455 def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3456 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3457 def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3458 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3459} 3460 3461// anyext -> zext 3462def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3463 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3464def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3465 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3466def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3467 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3468def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3469 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3470def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3471 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3472def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3473 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3474def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3475 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3476// unscaled zext 3477def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3478 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3479def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3480 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3481def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3482 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3483def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3484 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3485def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3486 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3487def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3488 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3489def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3490 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3491 3492 3493//--- 3494// LDR mnemonics fall back to LDUR for negative or unaligned offsets. 3495 3496// Define new assembler match classes as we want to only match these when 3497// the don't otherwise match the scaled addressing mode for LDR/STR. Don't 3498// associate a DiagnosticType either, as we want the diagnostic for the 3499// canonical form (the scaled operand) to take precedence. 3500class SImm9OffsetOperand<int Width> : AsmOperandClass { 3501 let Name = "SImm9OffsetFB" # Width; 3502 let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; 3503 let RenderMethod = "addImmOperands"; 3504} 3505 3506def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; 3507def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; 3508def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; 3509def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; 3510def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; 3511 3512def simm9_offset_fb8 : Operand<i64> { 3513 let ParserMatchClass = SImm9OffsetFB8Operand; 3514} 3515def simm9_offset_fb16 : Operand<i64> { 3516 let ParserMatchClass = SImm9OffsetFB16Operand; 3517} 3518def simm9_offset_fb32 : Operand<i64> { 3519 let ParserMatchClass = SImm9OffsetFB32Operand; 3520} 3521def simm9_offset_fb64 : Operand<i64> { 3522 let ParserMatchClass = SImm9OffsetFB64Operand; 3523} 3524def simm9_offset_fb128 : Operand<i64> { 3525 let ParserMatchClass = SImm9OffsetFB128Operand; 3526} 3527 3528def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3529 (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3530def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3531 (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3532def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3533 (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3534def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3535 (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3536def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3537 (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3538def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3539 (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3540def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3541 (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 3542 3543// zextload -> i64 3544def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3545 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3546def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3547 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3548 3549// load sign-extended half-word 3550defm LDURSHW 3551 : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", 3552 [(set GPR32:$Rt, 3553 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3554defm LDURSHX 3555 : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", 3556 [(set GPR64:$Rt, 3557 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3558 3559// load sign-extended byte 3560defm LDURSBW 3561 : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", 3562 [(set GPR32:$Rt, 3563 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3564defm LDURSBX 3565 : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", 3566 [(set GPR64:$Rt, 3567 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3568 3569// load sign-extended word 3570defm LDURSW 3571 : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", 3572 [(set GPR64:$Rt, 3573 (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3574 3575// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. 3576def : InstAlias<"ldrb $Rt, [$Rn, $offset]", 3577 (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3578def : InstAlias<"ldrh $Rt, [$Rn, $offset]", 3579 (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3580def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3581 (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3582def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3583 (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3584def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3585 (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3586def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3587 (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3588def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", 3589 (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3590 3591// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros, 3592// load, 0) can use a single load. 3593multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT, 3594 ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst, 3595 ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm, 3596 SubRegIndex SubReg> { 3597 // Scaled 3598 def : Pat <(vector_insert (VT immAllZerosV), 3599 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3600 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3601 // Unscaled 3602 def : Pat <(vector_insert (VT immAllZerosV), 3603 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3604 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3605 3606 // Half-vector patterns 3607 def : Pat <(vector_insert (HVT immAllZerosV), 3608 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3609 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3610 // Unscaled 3611 def : Pat <(vector_insert (HVT immAllZerosV), 3612 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3613 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3614 3615 // SVE patterns 3616 def : Pat <(vector_insert (SVT immAllZerosV), 3617 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3618 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3619 // Unscaled 3620 def : Pat <(vector_insert (SVT immAllZerosV), 3621 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3622 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3623} 3624 3625defm : LoadInsertZeroPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32, LDRBui, LDURBi, 3626 am_indexed8, am_unscaled8, uimm12s1, bsub>; 3627defm : LoadInsertZeroPatterns<extloadi16, v8i16, v4i16, nxv8i16, i32, LDRHui, LDURHi, 3628 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3629defm : LoadInsertZeroPatterns<load, v4i32, v2i32, nxv4i32, i32, LDRSui, LDURSi, 3630 am_indexed32, am_unscaled32, uimm12s4, ssub>; 3631defm : LoadInsertZeroPatterns<load, v2i64, v1i64, nxv2i64, i64, LDRDui, LDURDi, 3632 am_indexed64, am_unscaled64, uimm12s8, dsub>; 3633defm : LoadInsertZeroPatterns<load, v8f16, v4f16, nxv8f16, f16, LDRHui, LDURHi, 3634 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3635defm : LoadInsertZeroPatterns<load, v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi, 3636 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3637defm : LoadInsertZeroPatterns<load, v4f32, v2f32, nxv4f32, f32, LDRSui, LDURSi, 3638 am_indexed32, am_unscaled32, uimm12s4, ssub>; 3639defm : LoadInsertZeroPatterns<load, v2f64, v1f64, nxv2f64, f64, LDRDui, LDURDi, 3640 am_indexed64, am_unscaled64, uimm12s8, dsub>; 3641 3642// Pre-fetch. 3643defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", 3644 [(AArch64Prefetch timm:$Rt, 3645 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3646 3647//--- 3648// (unscaled immediate, unprivileged) 3649defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; 3650defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; 3651 3652defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; 3653defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; 3654 3655// load sign-extended half-word 3656defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; 3657defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; 3658 3659// load sign-extended byte 3660defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; 3661defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; 3662 3663// load sign-extended word 3664defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; 3665 3666//--- 3667// (immediate pre-indexed) 3668def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3669def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3670let Predicates = [HasFPARMv8] in { 3671def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3672def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3673def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3674def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3675def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3676} 3677 3678// load sign-extended half-word 3679def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3680def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3681 3682// load sign-extended byte 3683def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3684def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3685 3686// load zero-extended byte 3687def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3688def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3689 3690// load sign-extended word 3691def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3692 3693//--- 3694// (immediate post-indexed) 3695def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3696def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3697let Predicates = [HasFPARMv8] in { 3698def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3699def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3700def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3701def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3702def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3703} 3704 3705// load sign-extended half-word 3706def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3707def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3708 3709// load sign-extended byte 3710def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3711def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3712 3713// load zero-extended byte 3714def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3715def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3716 3717// load sign-extended word 3718def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3719 3720//===----------------------------------------------------------------------===// 3721// Store instructions. 3722//===----------------------------------------------------------------------===// 3723 3724// Pair (indexed, offset) 3725// FIXME: Use dedicated range-checked addressing mode operand here. 3726defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">; 3727defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">; 3728let Predicates = [HasFPARMv8] in { 3729defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">; 3730defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">; 3731defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">; 3732} 3733 3734// Pair (pre-indexed) 3735def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3736def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3737let Predicates = [HasFPARMv8] in { 3738def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3739def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3740def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3741} 3742 3743// Pair (post-indexed) 3744def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3745def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3746let Predicates = [HasFPARMv8] in { 3747def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3748def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3749def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3750} 3751 3752// Pair (no allocate) 3753defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">; 3754defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">; 3755let Predicates = [HasFPARMv8] in { 3756defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">; 3757defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">; 3758defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">; 3759} 3760 3761def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 3762 (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>; 3763 3764def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 3765 (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>; 3766 3767 3768//--- 3769// (Register offset) 3770 3771// Integer 3772defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; 3773defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; 3774defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; 3775defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; 3776 3777 3778// Floating-point 3779let Predicates = [HasFPARMv8] in { 3780defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", i8, store>; 3781defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; 3782defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; 3783defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; 3784defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">; 3785} 3786 3787let Predicates = [UseSTRQro], AddedComplexity = 10 in { 3788 def : Pat<(store (f128 FPR128:$Rt), 3789 (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, 3790 ro_Wextend128:$extend)), 3791 (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; 3792 def : Pat<(store (f128 FPR128:$Rt), 3793 (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, 3794 ro_Xextend128:$extend)), 3795 (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; 3796} 3797 3798multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop, 3799 Instruction STRW, Instruction STRX> { 3800 3801 def : Pat<(storeop GPR64:$Rt, 3802 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3803 (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3804 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3805 3806 def : Pat<(storeop GPR64:$Rt, 3807 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3808 (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3809 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3810} 3811 3812let AddedComplexity = 10 in { 3813 // truncstore i64 3814 defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>; 3815 defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>; 3816 defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>; 3817} 3818 3819multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR, 3820 Instruction STRW, Instruction STRX> { 3821 def : Pat<(store (VecTy FPR:$Rt), 3822 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3823 (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3824 3825 def : Pat<(store (VecTy FPR:$Rt), 3826 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3827 (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3828} 3829 3830let AddedComplexity = 10 in { 3831// Match all store 64 bits width whose type is compatible with FPR64 3832let Predicates = [IsLE] in { 3833 // We must use ST1 to store vectors in big-endian. 3834 defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>; 3835 defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>; 3836 defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>; 3837 defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>; 3838 defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>; 3839 defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>; 3840} 3841 3842defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>; 3843defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>; 3844 3845// Match all store 128 bits width whose type is compatible with FPR128 3846let Predicates = [IsLE, UseSTRQro] in { 3847 // We must use ST1 to store vectors in big-endian. 3848 defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>; 3849 defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>; 3850 defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>; 3851 defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>; 3852 defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>; 3853 defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>; 3854 defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>; 3855 defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>; 3856} 3857} // AddedComplexity = 10 3858 3859// Match stores from lane 0 to the appropriate subreg's store. 3860multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop, 3861 ValueType VecTy, ValueType STy, 3862 ValueType SubRegTy, 3863 SubRegIndex SubRegIdx, 3864 Instruction STRW, Instruction STRX> { 3865 3866 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))), 3867 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3868 (STRW (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), 3869 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3870 3871 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))), 3872 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3873 (STRX (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), 3874 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3875} 3876 3877let AddedComplexity = 19 in { 3878 defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, f16, hsub, STRHroW, STRHroX>; 3879 defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, f16, hsub, STRHroW, STRHroX>; 3880 defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, i32, ssub, STRSroW, STRSroX>; 3881 defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, i32, ssub, STRSroW, STRSroX>; 3882 defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, i64, dsub, STRDroW, STRDroX>; 3883 defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, i64, dsub, STRDroW, STRDroX>; 3884} 3885 3886//--- 3887// (unsigned immediate) 3888defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", 3889 [(store GPR64z:$Rt, 3890 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3891defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", 3892 [(store GPR32z:$Rt, 3893 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3894let Predicates = [HasFPARMv8] in { 3895defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", 3896 [(store FPR8Op:$Rt, 3897 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; 3898defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", 3899 [(store (f16 FPR16Op:$Rt), 3900 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; 3901defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str", 3902 [(store (f32 FPR32Op:$Rt), 3903 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3904defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str", 3905 [(store (f64 FPR64Op:$Rt), 3906 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3907defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>; 3908} 3909 3910defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", 3911 [(truncstorei16 GPR32z:$Rt, 3912 (am_indexed16 GPR64sp:$Rn, 3913 uimm12s2:$offset))]>; 3914defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", 3915 [(truncstorei8 GPR32z:$Rt, 3916 (am_indexed8 GPR64sp:$Rn, 3917 uimm12s1:$offset))]>; 3918 3919// bf16 store pattern 3920def : Pat<(store (bf16 FPR16Op:$Rt), 3921 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3922 (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>; 3923 3924let AddedComplexity = 10 in { 3925 3926// Match all store 64 bits width whose type is compatible with FPR64 3927def : Pat<(store (v1i64 FPR64:$Rt), 3928 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3929 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3930def : Pat<(store (v1f64 FPR64:$Rt), 3931 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3932 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3933 3934let Predicates = [IsLE] in { 3935 // We must use ST1 to store vectors in big-endian. 3936 def : Pat<(store (v2f32 FPR64:$Rt), 3937 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3938 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3939 def : Pat<(store (v8i8 FPR64:$Rt), 3940 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3941 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3942 def : Pat<(store (v4i16 FPR64:$Rt), 3943 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3944 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3945 def : Pat<(store (v2i32 FPR64:$Rt), 3946 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3947 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3948 def : Pat<(store (v4f16 FPR64:$Rt), 3949 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3950 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3951 def : Pat<(store (v4bf16 FPR64:$Rt), 3952 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3953 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3954} 3955 3956// Match all store 128 bits width whose type is compatible with FPR128 3957def : Pat<(store (f128 FPR128:$Rt), 3958 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3959 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3960 3961let Predicates = [IsLE] in { 3962 // We must use ST1 to store vectors in big-endian. 3963 def : Pat<(store (v4f32 FPR128:$Rt), 3964 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3965 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3966 def : Pat<(store (v2f64 FPR128:$Rt), 3967 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3968 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3969 def : Pat<(store (v16i8 FPR128:$Rt), 3970 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3971 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3972 def : Pat<(store (v8i16 FPR128:$Rt), 3973 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3974 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3975 def : Pat<(store (v4i32 FPR128:$Rt), 3976 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3977 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3978 def : Pat<(store (v2i64 FPR128:$Rt), 3979 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3980 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3981 def : Pat<(store (v8f16 FPR128:$Rt), 3982 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3983 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3984 def : Pat<(store (v8bf16 FPR128:$Rt), 3985 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3986 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3987} 3988 3989// truncstore i64 3990def : Pat<(truncstorei32 GPR64:$Rt, 3991 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), 3992 (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; 3993def : Pat<(truncstorei16 GPR64:$Rt, 3994 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3995 (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; 3996def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), 3997 (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; 3998 3999} // AddedComplexity = 10 4000 4001// Match stores from lane 0 to the appropriate subreg's store. 4002multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop, 4003 ValueType VTy, ValueType STy, 4004 ValueType SubRegTy, 4005 SubRegIndex SubRegIdx, Operand IndexType, 4006 Instruction STR> { 4007 def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), (i64 0))), 4008 (UIAddrMode GPR64sp:$Rn, IndexType:$offset)), 4009 (STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), 4010 GPR64sp:$Rn, IndexType:$offset)>; 4011} 4012 4013let AddedComplexity = 19 in { 4014 defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>; 4015 defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, f16, hsub, uimm12s2, STRHui>; 4016 defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, i32, ssub, uimm12s4, STRSui>; 4017 defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, i32, ssub, uimm12s4, STRSui>; 4018 defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, i64, dsub, uimm12s8, STRDui>; 4019 defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, i64, dsub, uimm12s8, STRDui>; 4020} 4021 4022//--- 4023// (unscaled immediate) 4024defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur", 4025 [(store GPR64z:$Rt, 4026 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 4027defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur", 4028 [(store GPR32z:$Rt, 4029 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 4030let Predicates = [HasFPARMv8] in { 4031defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", 4032 [(store FPR8Op:$Rt, 4033 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 4034defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", 4035 [(store (f16 FPR16Op:$Rt), 4036 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 4037defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur", 4038 [(store (f32 FPR32Op:$Rt), 4039 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 4040defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur", 4041 [(store (f64 FPR64Op:$Rt), 4042 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 4043defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur", 4044 [(store (f128 FPR128Op:$Rt), 4045 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; 4046} 4047defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh", 4048 [(truncstorei16 GPR32z:$Rt, 4049 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 4050defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb", 4051 [(truncstorei8 GPR32z:$Rt, 4052 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 4053 4054// bf16 store pattern 4055def : Pat<(store (bf16 FPR16Op:$Rt), 4056 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 4057 (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4058 4059// Armv8.4 Weaker Release Consistency enhancements 4060// LDAPR & STLR with Immediate Offset instructions 4061let Predicates = [HasRCPC_IMMO] in { 4062defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; 4063defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; 4064defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; 4065defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>; 4066defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>; 4067defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>; 4068defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>; 4069defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>; 4070defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>; 4071defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>; 4072defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>; 4073defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>; 4074defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>; 4075} 4076 4077// Match all store 64 bits width whose type is compatible with FPR64 4078def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4079 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4080def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4081 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4082 4083let AddedComplexity = 10 in { 4084 4085let Predicates = [IsLE] in { 4086 // We must use ST1 to store vectors in big-endian. 4087 def : Pat<(store (v2f32 FPR64:$Rt), 4088 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4089 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4090 def : Pat<(store (v8i8 FPR64:$Rt), 4091 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4092 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4093 def : Pat<(store (v4i16 FPR64:$Rt), 4094 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4095 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4096 def : Pat<(store (v2i32 FPR64:$Rt), 4097 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4098 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4099 def : Pat<(store (v4f16 FPR64:$Rt), 4100 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4101 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4102 def : Pat<(store (v4bf16 FPR64:$Rt), 4103 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4104 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4105} 4106 4107// Match all store 128 bits width whose type is compatible with FPR128 4108def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4109 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4110 4111let Predicates = [IsLE] in { 4112 // We must use ST1 to store vectors in big-endian. 4113 def : Pat<(store (v4f32 FPR128:$Rt), 4114 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4115 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4116 def : Pat<(store (v2f64 FPR128:$Rt), 4117 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4118 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4119 def : Pat<(store (v16i8 FPR128:$Rt), 4120 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4121 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4122 def : Pat<(store (v8i16 FPR128:$Rt), 4123 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4124 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4125 def : Pat<(store (v4i32 FPR128:$Rt), 4126 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4127 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4128 def : Pat<(store (v2i64 FPR128:$Rt), 4129 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4130 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4131 def : Pat<(store (v2f64 FPR128:$Rt), 4132 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4133 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4134 def : Pat<(store (v8f16 FPR128:$Rt), 4135 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4136 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4137 def : Pat<(store (v8bf16 FPR128:$Rt), 4138 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4139 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4140} 4141 4142} // AddedComplexity = 10 4143 4144// unscaled i64 truncating stores 4145def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), 4146 (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 4147def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 4148 (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 4149def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), 4150 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 4151 4152// Match stores from lane 0 to the appropriate subreg's store. 4153multiclass VecStoreULane0Pat<SDPatternOperator StoreOp, 4154 ValueType VTy, ValueType STy, 4155 ValueType SubRegTy, 4156 SubRegIndex SubRegIdx, Instruction STR> { 4157 defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegTy, SubRegIdx, simm9, STR>; 4158} 4159 4160let AddedComplexity = 19 in { 4161 defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>; 4162 defm : VecStoreULane0Pat<store, v8f16, f16, f16, hsub, STURHi>; 4163 defm : VecStoreULane0Pat<store, v4i32, i32, i32, ssub, STURSi>; 4164 defm : VecStoreULane0Pat<store, v4f32, f32, i32, ssub, STURSi>; 4165 defm : VecStoreULane0Pat<store, v2i64, i64, i64, dsub, STURDi>; 4166 defm : VecStoreULane0Pat<store, v2f64, f64, i64, dsub, STURDi>; 4167} 4168 4169//--- 4170// STR mnemonics fall back to STUR for negative or unaligned offsets. 4171def : InstAlias<"str $Rt, [$Rn, $offset]", 4172 (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 4173def : InstAlias<"str $Rt, [$Rn, $offset]", 4174 (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 4175def : InstAlias<"str $Rt, [$Rn, $offset]", 4176 (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 4177def : InstAlias<"str $Rt, [$Rn, $offset]", 4178 (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 4179def : InstAlias<"str $Rt, [$Rn, $offset]", 4180 (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 4181def : InstAlias<"str $Rt, [$Rn, $offset]", 4182 (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 4183def : InstAlias<"str $Rt, [$Rn, $offset]", 4184 (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 4185 4186def : InstAlias<"strb $Rt, [$Rn, $offset]", 4187 (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 4188def : InstAlias<"strh $Rt, [$Rn, $offset]", 4189 (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 4190 4191//--- 4192// (unscaled immediate, unprivileged) 4193defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; 4194defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; 4195 4196defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; 4197defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; 4198 4199//--- 4200// (immediate pre-indexed) 4201def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; 4202def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; 4203let Predicates = [HasFPARMv8] in { 4204def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, i8>; 4205def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; 4206def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; 4207def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; 4208def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; 4209} 4210 4211def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; 4212def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; 4213 4214// truncstore i64 4215def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4216 (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4217 simm9:$off)>; 4218def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4219 (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4220 simm9:$off)>; 4221def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4222 (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4223 simm9:$off)>; 4224 4225def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4226 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4227def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4228 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4229def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4230 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4231def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4232 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4233def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4234 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4235def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4236 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4237def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4238 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4239 4240def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4241 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4242def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4243 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4244def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4245 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4246def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4247 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4248def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4249 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4250def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4251 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4252def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4253 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4254 4255//--- 4256// (immediate post-indexed) 4257def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; 4258def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; 4259let Predicates = [HasFPARMv8] in { 4260def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, i8>; 4261def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; 4262def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; 4263def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; 4264def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>; 4265} 4266 4267def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>; 4268def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>; 4269 4270// truncstore i64 4271def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4272 (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4273 simm9:$off)>; 4274def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4275 (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4276 simm9:$off)>; 4277def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4278 (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4279 simm9:$off)>; 4280 4281def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), 4282 (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; 4283 4284def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4285 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4286def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4287 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4288def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4289 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4290def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4291 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4292def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4293 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4294def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4295 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4296def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4297 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4298def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4299 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4300 4301def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4302 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4303def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4304 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4305def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4306 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4307def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4308 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4309def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4310 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4311def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4312 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4313def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4314 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4315def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4316 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4317 4318//===----------------------------------------------------------------------===// 4319// Load/store exclusive instructions. 4320//===----------------------------------------------------------------------===// 4321 4322def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; 4323def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; 4324def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; 4325def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; 4326 4327def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; 4328def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; 4329def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; 4330def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; 4331 4332def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; 4333def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; 4334def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; 4335def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; 4336 4337def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; 4338def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; 4339def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; 4340def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; 4341 4342/* 4343Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn 4344of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an 4345alias for the case of immediate #0. This is because new STLR versions (from 4346LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not 4347appropriate anymore (it parses and discards the optional zero). This is not the 4348case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed, 4349and the immediate values are not inside the [] brackets and thus not accepted 4350by GPR64sp0 parser. 4351*/ 4352def STLRW0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW GPR32: $Rt, GPR64sp:$Rn)>; 4353def STLRX0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX GPR64: $Rt, GPR64sp:$Rn)>; 4354def STLRB0 : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB GPR32: $Rt, GPR64sp:$Rn)>; 4355def STLRH0 : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH GPR32: $Rt, GPR64sp:$Rn)>; 4356 4357def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; 4358def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; 4359def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; 4360def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; 4361 4362def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; 4363def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; 4364def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; 4365def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; 4366 4367def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; 4368def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; 4369 4370def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; 4371def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; 4372 4373def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; 4374def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; 4375 4376def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; 4377def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; 4378 4379let Predicates = [HasLOR] in { 4380 // v8.1a "Limited Order Region" extension load-acquire instructions 4381 def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; 4382 def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; 4383 def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; 4384 def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; 4385 4386 // v8.1a "Limited Order Region" extension store-release instructions 4387 def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; 4388 def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; 4389 def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; 4390 def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; 4391 4392 // Aliases for when offset=0 4393 def STLLRW0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRW GPR32: $Rt, GPR64sp:$Rn)>; 4394 def STLLRX0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRX GPR64: $Rt, GPR64sp:$Rn)>; 4395 def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]", (STLLRB GPR32: $Rt, GPR64sp:$Rn)>; 4396 def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]", (STLLRH GPR32: $Rt, GPR64sp:$Rn)>; 4397} 4398 4399//===----------------------------------------------------------------------===// 4400// Scaled floating point to integer conversion instructions. 4401//===----------------------------------------------------------------------===// 4402 4403defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; 4404defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; 4405defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; 4406defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; 4407defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; 4408defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; 4409defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; 4410defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; 4411defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4412defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4413defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4414defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4415 4416// AArch64's FCVT instructions saturate when out of range. 4417multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> { 4418 let Predicates = [HasFullFP16] in { 4419 def : Pat<(i32 (to_int_sat f16:$Rn, i32)), 4420 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4421 def : Pat<(i64 (to_int_sat f16:$Rn, i64)), 4422 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4423 } 4424 def : Pat<(i32 (to_int_sat f32:$Rn, i32)), 4425 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4426 def : Pat<(i64 (to_int_sat f32:$Rn, i64)), 4427 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4428 def : Pat<(i32 (to_int_sat f64:$Rn, i32)), 4429 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4430 def : Pat<(i64 (to_int_sat f64:$Rn, i64)), 4431 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4432 4433 let Predicates = [HasFullFP16] in { 4434 def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), 4435 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4436 def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), 4437 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4438 } 4439 def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), 4440 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4441 def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), 4442 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4443 def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), 4444 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4445 def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), 4446 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4447} 4448 4449defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">; 4450defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">; 4451 4452multiclass FPToIntegerIntPats<Intrinsic round, string INST> { 4453 let Predicates = [HasFullFP16] in { 4454 def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>; 4455 def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>; 4456 } 4457 def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>; 4458 def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>; 4459 def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>; 4460 def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>; 4461 4462 let Predicates = [HasFullFP16] in { 4463 def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), 4464 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4465 def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), 4466 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4467 } 4468 def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), 4469 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4470 def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), 4471 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4472 def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), 4473 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4474 def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), 4475 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4476} 4477 4478defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">; 4479defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">; 4480 4481multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> { 4482 def : Pat<(i32 (to_int (round f32:$Rn))), 4483 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4484 def : Pat<(i64 (to_int (round f32:$Rn))), 4485 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4486 def : Pat<(i32 (to_int (round f64:$Rn))), 4487 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4488 def : Pat<(i64 (to_int (round f64:$Rn))), 4489 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4490 4491 // These instructions saturate like fp_to_[su]int_sat. 4492 let Predicates = [HasFullFP16] in { 4493 def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), 4494 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4495 def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), 4496 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4497 } 4498 def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), 4499 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4500 def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), 4501 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4502 def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), 4503 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4504 def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), 4505 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4506} 4507 4508defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">; 4509defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">; 4510defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">; 4511defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">; 4512defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">; 4513defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">; 4514defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">; 4515defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">; 4516 4517 4518 4519let Predicates = [HasFullFP16] in { 4520 def : Pat<(i32 (any_lround f16:$Rn)), 4521 (FCVTASUWHr f16:$Rn)>; 4522 def : Pat<(i64 (any_lround f16:$Rn)), 4523 (FCVTASUXHr f16:$Rn)>; 4524 def : Pat<(i64 (any_llround f16:$Rn)), 4525 (FCVTASUXHr f16:$Rn)>; 4526} 4527def : Pat<(i32 (any_lround f32:$Rn)), 4528 (FCVTASUWSr f32:$Rn)>; 4529def : Pat<(i32 (any_lround f64:$Rn)), 4530 (FCVTASUWDr f64:$Rn)>; 4531def : Pat<(i64 (any_lround f32:$Rn)), 4532 (FCVTASUXSr f32:$Rn)>; 4533def : Pat<(i64 (any_lround f64:$Rn)), 4534 (FCVTASUXDr f64:$Rn)>; 4535def : Pat<(i64 (any_llround f32:$Rn)), 4536 (FCVTASUXSr f32:$Rn)>; 4537def : Pat<(i64 (any_llround f64:$Rn)), 4538 (FCVTASUXDr f64:$Rn)>; 4539 4540//===----------------------------------------------------------------------===// 4541// Scaled integer to floating point conversion instructions. 4542//===----------------------------------------------------------------------===// 4543 4544defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; 4545defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; 4546 4547def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), 4548 (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>; 4549def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)), 4550 (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>; 4551def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)), 4552 (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>; 4553 4554def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)), 4555 (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>; 4556def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)), 4557 (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>; 4558def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)), 4559 (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>; 4560 4561def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)), 4562 (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>; 4563def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)), 4564 (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>; 4565def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)), 4566 (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>; 4567 4568def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), 4569 (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>; 4570def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)), 4571 (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>; 4572def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)), 4573 (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>; 4574 4575//===----------------------------------------------------------------------===// 4576// Unscaled integer to floating point conversion instruction. 4577//===----------------------------------------------------------------------===// 4578 4579defm FMOV : UnscaledConversion<"fmov">; 4580 4581// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable 4582let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1, 4583 Predicates = [HasFPARMv8] in { 4584def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, 4585 Sched<[WriteF]>; 4586def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, 4587 Sched<[WriteF]>; 4588def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, 4589 Sched<[WriteF]>; 4590} 4591 4592// Similarly add aliases 4593def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, 4594 Requires<[HasFullFP16]>; 4595def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; 4596def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; 4597 4598def : Pat<(bf16 fpimm0), 4599 (FMOVH0)>; 4600 4601// Pattern for FP16 and BF16 immediates 4602let Predicates = [HasFullFP16] in { 4603 def : Pat<(f16 fpimm:$in), 4604 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; 4605 4606 def : Pat<(bf16 fpimm:$in), 4607 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>; 4608} 4609 4610//===----------------------------------------------------------------------===// 4611// Floating point conversion instruction. 4612//===----------------------------------------------------------------------===// 4613 4614defm FCVT : FPConversion<"fcvt">; 4615 4616//===----------------------------------------------------------------------===// 4617// Floating point single operand instructions. 4618//===----------------------------------------------------------------------===// 4619 4620defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>; 4621defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">; 4622defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>; 4623defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; 4624defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; 4625defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; 4626defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; 4627defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; 4628 4629defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; 4630defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; 4631 4632let SchedRW = [WriteFDiv] in { 4633defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; 4634} 4635 4636let Predicates = [HasFRInt3264] in { 4637 defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>; 4638 defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>; 4639 defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>; 4640 defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; 4641} // HasFRInt3264 4642 4643// Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions 4644def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))), 4645 (FRINT32ZDr FPR64:$Rn)>; 4646def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))), 4647 (FRINT64ZDr FPR64:$Rn)>; 4648def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))), 4649 (FRINT32XDr FPR64:$Rn)>; 4650def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))), 4651 (FRINT64XDr FPR64:$Rn)>; 4652 4653// Emitting strict_lrint as two instructions is valid as any exceptions that 4654// occur will happen in exactly one of the instructions (e.g. if the input is 4655// not an integer the inexact exception will happen in the FRINTX but not then 4656// in the FCVTZS as the output of FRINTX is an integer). 4657let Predicates = [HasFullFP16] in { 4658 def : Pat<(i32 (any_lrint f16:$Rn)), 4659 (FCVTZSUWHr (FRINTXHr f16:$Rn))>; 4660 def : Pat<(i64 (any_lrint f16:$Rn)), 4661 (FCVTZSUXHr (FRINTXHr f16:$Rn))>; 4662 def : Pat<(i64 (any_llrint f16:$Rn)), 4663 (FCVTZSUXHr (FRINTXHr f16:$Rn))>; 4664} 4665def : Pat<(i32 (any_lrint f32:$Rn)), 4666 (FCVTZSUWSr (FRINTXSr f32:$Rn))>; 4667def : Pat<(i32 (any_lrint f64:$Rn)), 4668 (FCVTZSUWDr (FRINTXDr f64:$Rn))>; 4669def : Pat<(i64 (any_lrint f32:$Rn)), 4670 (FCVTZSUXSr (FRINTXSr f32:$Rn))>; 4671def : Pat<(i64 (any_lrint f64:$Rn)), 4672 (FCVTZSUXDr (FRINTXDr f64:$Rn))>; 4673def : Pat<(i64 (any_llrint f32:$Rn)), 4674 (FCVTZSUXSr (FRINTXSr f32:$Rn))>; 4675def : Pat<(i64 (any_llrint f64:$Rn)), 4676 (FCVTZSUXDr (FRINTXDr f64:$Rn))>; 4677 4678//===----------------------------------------------------------------------===// 4679// Floating point two operand instructions. 4680//===----------------------------------------------------------------------===// 4681 4682defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; 4683let SchedRW = [WriteFDiv] in { 4684defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; 4685} 4686defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; 4687defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; 4688defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; 4689defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; 4690let SchedRW = [WriteFMul] in { 4691defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; 4692defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; 4693} 4694defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; 4695 4696multiclass FMULScalarFromIndexedLane0Patterns<string inst, 4697 string inst_f16_suffix, 4698 string inst_f32_suffix, 4699 string inst_f64_suffix, 4700 SDPatternOperator OpNode, 4701 list<Predicate> preds = []> { 4702 let Predicates = !listconcat(preds, [HasFullFP16]) in { 4703 def : Pat<(f16 (OpNode (f16 FPR16:$Rn), 4704 (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))), 4705 (!cast<Instruction>(inst # inst_f16_suffix) 4706 FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)))>; 4707 } 4708 let Predicates = preds in { 4709 def : Pat<(f32 (OpNode (f32 FPR32:$Rn), 4710 (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))), 4711 (!cast<Instruction>(inst # inst_f32_suffix) 4712 FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>; 4713 def : Pat<(f64 (OpNode (f64 FPR64:$Rn), 4714 (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))), 4715 (!cast<Instruction>(inst # inst_f64_suffix) 4716 FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>; 4717 } 4718} 4719 4720defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr", 4721 any_fmul>; 4722 4723// Match reassociated forms of FNMUL. 4724def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)), 4725 (FNMULHrr FPR16:$a, FPR16:$b)>, 4726 Requires<[HasFullFP16]>; 4727def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)), 4728 (FNMULSrr FPR32:$a, FPR32:$b)>; 4729def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)), 4730 (FNMULDrr FPR64:$a, FPR64:$b)>; 4731 4732def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4733 (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; 4734def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4735 (FMINDrr FPR64:$Rn, FPR64:$Rm)>; 4736def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4737 (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; 4738def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4739 (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; 4740 4741//===----------------------------------------------------------------------===// 4742// Floating point three operand instructions. 4743//===----------------------------------------------------------------------===// 4744 4745defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; 4746defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", 4747 TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; 4748defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", 4749 TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; 4750defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", 4751 TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; 4752 4753// The following def pats catch the case where the LHS of an FMA is negated. 4754// The TriOpFrag above catches the case where the middle operand is negated. 4755 4756// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike 4757// the NEON variant. 4758 4759// Here we handle first -(a + b*c) for FNMADD: 4760 4761let Predicates = [HasNEON, HasFullFP16] in 4762def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), 4763 (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4764 4765def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), 4766 (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4767 4768def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), 4769 (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4770 4771// Now it's time for "(-a) + (-b)*c" 4772 4773let Predicates = [HasNEON, HasFullFP16] in 4774def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), 4775 (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4776 4777def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), 4778 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4779 4780def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), 4781 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4782 4783//===----------------------------------------------------------------------===// 4784// Floating point comparison instructions. 4785//===----------------------------------------------------------------------===// 4786 4787defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>; 4788defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>; 4789 4790//===----------------------------------------------------------------------===// 4791// Floating point conditional comparison instructions. 4792//===----------------------------------------------------------------------===// 4793 4794defm FCCMPE : FPCondComparison<1, "fccmpe">; 4795defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; 4796 4797//===----------------------------------------------------------------------===// 4798// Floating point conditional select instruction. 4799//===----------------------------------------------------------------------===// 4800 4801defm FCSEL : FPCondSelect<"fcsel">; 4802 4803let Predicates = [HasFullFP16] in 4804def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)), 4805 (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>; 4806 4807// CSEL instructions providing f128 types need to be handled by a 4808// pseudo-instruction since the eventual code will need to introduce basic 4809// blocks and control flow. 4810let Predicates = [HasFPARMv8] in 4811def F128CSEL : Pseudo<(outs FPR128:$Rd), 4812 (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), 4813 [(set (f128 FPR128:$Rd), 4814 (AArch64csel FPR128:$Rn, FPR128:$Rm, 4815 (i32 imm:$cond), NZCV))]> { 4816 let Uses = [NZCV]; 4817 let usesCustomInserter = 1; 4818 let hasNoSchedulingInfo = 1; 4819} 4820 4821//===----------------------------------------------------------------------===// 4822// Instructions used for emitting unwind opcodes on ARM64 Windows. 4823//===----------------------------------------------------------------------===// 4824let isPseudo = 1 in { 4825 def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>; 4826 def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4827 def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4828 def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4829 def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4830 def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4831 def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4832 def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4833 def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4834 def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4835 def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4836 def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>; 4837 def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4838 def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>; 4839 def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4840 def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>; 4841 def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4842 def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>; 4843 def SEH_SaveAnyRegQP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4844 def SEH_SaveAnyRegQPX : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4845} 4846 4847// Pseudo instructions for Windows EH 4848//===----------------------------------------------------------------------===// 4849let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, 4850 isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in { 4851 def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>; 4852 let usesCustomInserter = 1 in 4853 def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>, 4854 Sched<[]>; 4855} 4856 4857// Pseudo instructions for homogeneous prolog/epilog 4858let isPseudo = 1 in { 4859 // Save CSRs in order, {FPOffset} 4860 def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4861 // Restore CSRs in order 4862 def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4863} 4864 4865//===----------------------------------------------------------------------===// 4866// Floating point immediate move. 4867//===----------------------------------------------------------------------===// 4868 4869let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 4870defm FMOV : FPMoveImmediate<"fmov">; 4871} 4872 4873let Predicates = [HasFullFP16] in { 4874 def : Pat<(bf16 fpimmbf16:$in), 4875 (FMOVHi (fpimm16XForm bf16:$in))>; 4876} 4877 4878//===----------------------------------------------------------------------===// 4879// Advanced SIMD two vector instructions. 4880//===----------------------------------------------------------------------===// 4881 4882defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", 4883 AArch64uabd>; 4884// Match UABDL in log2-shuffle patterns. 4885def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), 4886 (zext (v8i8 V64:$opB))))), 4887 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4888def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4889 (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), 4890 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4891def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), 4892 (zext (v4i16 V64:$opB))))), 4893 (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; 4894def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))), 4895 (zext (extract_high_v8i16 (v8i16 V128:$opB)))))), 4896 (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; 4897def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), 4898 (zext (v2i32 V64:$opB))))), 4899 (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; 4900def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))), 4901 (zext (extract_high_v4i32 (v4i32 V128:$opB)))))), 4902 (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; 4903 4904defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; 4905defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; 4906defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; 4907defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; 4908defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; 4909defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; 4910defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; 4911defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; 4912defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; 4913defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>; 4914 4915def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), 4916 (CMLTv8i8rz V64:$Rn)>; 4917def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))), 4918 (CMLTv4i16rz V64:$Rn)>; 4919def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))), 4920 (CMLTv2i32rz V64:$Rn)>; 4921def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))), 4922 (CMLTv16i8rz V128:$Rn)>; 4923def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))), 4924 (CMLTv8i16rz V128:$Rn)>; 4925def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))), 4926 (CMLTv4i32rz V128:$Rn)>; 4927def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))), 4928 (CMLTv2i64rz V128:$Rn)>; 4929 4930defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 4931defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 4932defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 4933defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 4934defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 4935defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; 4936defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; 4937defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; 4938def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), 4939 (FCVTLv4i16 V64:$Rn)>; 4940def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), 4941 (i64 4)))), 4942 (FCVTLv8i16 V128:$Rn)>; 4943def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), 4944 (FCVTLv2i32 V64:$Rn)>; 4945def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))), 4946 (FCVTLv4i32 V128:$Rn)>; 4947def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), 4948 (FCVTLv4i16 V64:$Rn)>; 4949def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))), 4950 (FCVTLv8i16 V128:$Rn)>; 4951 4952defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; 4953defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; 4954defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; 4955defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; 4956defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; 4957def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), 4958 (FCVTNv4i16 V128:$Rn)>; 4959def : Pat<(concat_vectors V64:$Rd, 4960 (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), 4961 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4962def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), 4963 (FCVTNv2i32 V128:$Rn)>; 4964def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), 4965 (FCVTNv4i16 V128:$Rn)>; 4966def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), 4967 (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4968def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))), 4969 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4970defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; 4971defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; 4972defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", 4973 int_aarch64_neon_fcvtxn>; 4974defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; 4975defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; 4976 4977// AArch64's FCVT instructions saturate when out of range. 4978multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> { 4979 let Predicates = [HasFullFP16] in { 4980 def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)), 4981 (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>; 4982 def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)), 4983 (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>; 4984 } 4985 def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)), 4986 (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>; 4987 def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)), 4988 (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>; 4989 def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)), 4990 (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>; 4991} 4992defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">; 4993defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">; 4994 4995def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; 4996def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; 4997def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; 4998def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>; 4999def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>; 5000 5001def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>; 5002def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>; 5003def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; 5004def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; 5005def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; 5006 5007defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>; 5008defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; 5009defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; 5010defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; 5011defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; 5012defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; 5013defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; 5014defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; 5015defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; 5016 5017let Predicates = [HasFRInt3264] in { 5018 defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; 5019 defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>; 5020 defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>; 5021 defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>; 5022} // HasFRInt3264 5023 5024defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; 5025defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; 5026defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", 5027 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 5028defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; 5029// Aliases for MVN -> NOT. 5030def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", 5031 (NOTv8i8 V64:$Vd, V64:$Vn)>; 5032def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", 5033 (NOTv16i8 V128:$Vd, V128:$Vn)>; 5034 5035def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 5036def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 5037def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 5038def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 5039def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 5040def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 5041 5042defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>; 5043defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; 5044defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; 5045defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; 5046defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", 5047 BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >; 5048defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>; 5049defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; 5050defm SHLL : SIMDVectorLShiftLongBySizeBHS; 5051defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 5052defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 5053defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; 5054defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; 5055defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; 5056defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", 5057 BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; 5058defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; 5059defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; 5060defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; 5061defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; 5062defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; 5063defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; 5064defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; 5065 5066def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 5067def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 5068def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 5069def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 5070def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 5071def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 5072def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 5073def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 5074def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; 5075def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; 5076 5077// Patterns for vector long shift (by element width). These need to match all 5078// three of zext, sext and anyext so it's easier to pull the patterns out of the 5079// definition. 5080multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { 5081 def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), 5082 (SHLLv8i8 V64:$Rn)>; 5083 def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)), 5084 (SHLLv16i8 V128:$Rn)>; 5085 def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), 5086 (SHLLv4i16 V64:$Rn)>; 5087 def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)), 5088 (SHLLv8i16 V128:$Rn)>; 5089 def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), 5090 (SHLLv2i32 V64:$Rn)>; 5091 def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)), 5092 (SHLLv4i32 V128:$Rn)>; 5093} 5094 5095defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; 5096defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; 5097defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; 5098 5099// Constant vector values, used in the S/UQXTN patterns below. 5100def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>; 5101def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>; 5102def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>; 5103def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>; 5104def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>; 5105def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>; 5106 5107// trunc(umin(X, 255)) -> UQXTRN v8i8 5108def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))), 5109 (UQXTNv8i8 V128:$Vn)>; 5110// trunc(umin(X, 65535)) -> UQXTRN v4i16 5111def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))), 5112 (UQXTNv4i16 V128:$Vn)>; 5113// trunc(smin(smax(X, -128), 128)) -> SQXTRN 5114// with reversed min/max 5115def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 5116 (v8i16 VImm7F)))), 5117 (SQXTNv8i8 V128:$Vn)>; 5118def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 5119 (v8i16 VImm80)))), 5120 (SQXTNv8i8 V128:$Vn)>; 5121// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN 5122// with reversed min/max 5123def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 5124 (v4i32 VImm7FFF)))), 5125 (SQXTNv4i16 V128:$Vn)>; 5126def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 5127 (v4i32 VImm8000)))), 5128 (SQXTNv4i16 V128:$Vn)>; 5129 5130// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) 5131// with reversed min/max 5132def : Pat<(v16i8 (concat_vectors 5133 (v8i8 V64:$Vd), 5134 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 5135 (v8i16 VImm7F)))))), 5136 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5137def : Pat<(v16i8 (concat_vectors 5138 (v8i8 V64:$Vd), 5139 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 5140 (v8i16 VImm80)))))), 5141 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5142 5143// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) 5144// with reversed min/max 5145def : Pat<(v8i16 (concat_vectors 5146 (v4i16 V64:$Vd), 5147 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 5148 (v4i32 VImm7FFF)))))), 5149 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5150def : Pat<(v8i16 (concat_vectors 5151 (v4i16 V64:$Vd), 5152 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 5153 (v4i32 VImm8000)))))), 5154 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5155 5156//===----------------------------------------------------------------------===// 5157// Advanced SIMD three vector instructions. 5158//===----------------------------------------------------------------------===// 5159 5160defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; 5161defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>; 5162defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; 5163defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; 5164defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; 5165defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; 5166defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; 5167defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; 5168foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in { 5169def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>; 5170} 5171defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; 5172let Predicates = [HasNEON] in { 5173foreach VT = [ v2f32, v4f32, v2f64 ] in 5174def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 5175} 5176let Predicates = [HasNEON, HasFullFP16] in { 5177foreach VT = [ v4f16, v8f16 ] in 5178def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 5179} 5180defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>; 5181defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>; 5182defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>; 5183defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; 5184defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 5185defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 5186defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 5187defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; 5188defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; 5189defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; 5190defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; 5191defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; 5192defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; 5193defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; 5194defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; 5195defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; 5196 5197// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the 5198// instruction expects the addend first, while the fma intrinsic puts it last. 5199defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", 5200 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 5201defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", 5202 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 5203 5204defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; 5205defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; 5206defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; 5207defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; 5208defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; 5209 5210// MLA and MLS are generated in MachineCombine 5211defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; 5212defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; 5213 5214defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; 5215defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; 5216defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", 5217 TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; 5218defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; 5219defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>; 5220defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; 5221defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; 5222defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; 5223defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; 5224defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; 5225defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; 5226defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; 5227defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; 5228defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; 5229defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; 5230defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; 5231defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>; 5232defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; 5233defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; 5234defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; 5235defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", 5236 TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; 5237defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; 5238defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>; 5239defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; 5240defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; 5241defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; 5242defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; 5243defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; 5244defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; 5245defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; 5246defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; 5247defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; 5248defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>; 5249defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; 5250defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; 5251defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", 5252 int_aarch64_neon_sqrdmlah>; 5253defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", 5254 int_aarch64_neon_sqrdmlsh>; 5255 5256// Extra saturate patterns, other than the intrinsics matches above 5257defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>; 5258defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>; 5259defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>; 5260defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>; 5261 5262defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; 5263defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", 5264 BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; 5265defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; 5266defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", 5267 BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; 5268defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; 5269 5270// Pseudo bitwise select pattern BSP. 5271// It is expanded into BSL/BIT/BIF after register allocation. 5272defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS), 5273 (and (vnot node:$LHS), node:$RHS))>>; 5274defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">; 5275defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; 5276defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">; 5277 5278def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), 5279 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5280def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), 5281 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5282def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), 5283 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5284def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), 5285 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5286 5287def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), 5288 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5289def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), 5290 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5291def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), 5292 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5293def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), 5294 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5295 5296def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", 5297 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; 5298def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", 5299 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5300def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", 5301 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5302def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", 5303 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5304 5305def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", 5306 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; 5307def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", 5308 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5309def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", 5310 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5311def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", 5312 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5313 5314def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # 5315 "|cmls.8b\t$dst, $src1, $src2}", 5316 (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5317def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # 5318 "|cmls.16b\t$dst, $src1, $src2}", 5319 (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5320def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # 5321 "|cmls.4h\t$dst, $src1, $src2}", 5322 (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5323def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # 5324 "|cmls.8h\t$dst, $src1, $src2}", 5325 (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5326def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # 5327 "|cmls.2s\t$dst, $src1, $src2}", 5328 (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5329def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # 5330 "|cmls.4s\t$dst, $src1, $src2}", 5331 (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5332def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # 5333 "|cmls.2d\t$dst, $src1, $src2}", 5334 (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5335 5336def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # 5337 "|cmlo.8b\t$dst, $src1, $src2}", 5338 (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5339def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # 5340 "|cmlo.16b\t$dst, $src1, $src2}", 5341 (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5342def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # 5343 "|cmlo.4h\t$dst, $src1, $src2}", 5344 (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5345def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # 5346 "|cmlo.8h\t$dst, $src1, $src2}", 5347 (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5348def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # 5349 "|cmlo.2s\t$dst, $src1, $src2}", 5350 (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5351def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # 5352 "|cmlo.4s\t$dst, $src1, $src2}", 5353 (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5354def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # 5355 "|cmlo.2d\t$dst, $src1, $src2}", 5356 (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5357 5358def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # 5359 "|cmle.8b\t$dst, $src1, $src2}", 5360 (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5361def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # 5362 "|cmle.16b\t$dst, $src1, $src2}", 5363 (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5364def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # 5365 "|cmle.4h\t$dst, $src1, $src2}", 5366 (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5367def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # 5368 "|cmle.8h\t$dst, $src1, $src2}", 5369 (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5370def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # 5371 "|cmle.2s\t$dst, $src1, $src2}", 5372 (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5373def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # 5374 "|cmle.4s\t$dst, $src1, $src2}", 5375 (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5376def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # 5377 "|cmle.2d\t$dst, $src1, $src2}", 5378 (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5379 5380def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # 5381 "|cmlt.8b\t$dst, $src1, $src2}", 5382 (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5383def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # 5384 "|cmlt.16b\t$dst, $src1, $src2}", 5385 (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5386def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # 5387 "|cmlt.4h\t$dst, $src1, $src2}", 5388 (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5389def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # 5390 "|cmlt.8h\t$dst, $src1, $src2}", 5391 (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5392def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # 5393 "|cmlt.2s\t$dst, $src1, $src2}", 5394 (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5395def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # 5396 "|cmlt.4s\t$dst, $src1, $src2}", 5397 (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5398def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # 5399 "|cmlt.2d\t$dst, $src1, $src2}", 5400 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5401 5402let Predicates = [HasNEON, HasFullFP16] in { 5403def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # 5404 "|fcmle.4h\t$dst, $src1, $src2}", 5405 (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5406def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # 5407 "|fcmle.8h\t$dst, $src1, $src2}", 5408 (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5409} 5410def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # 5411 "|fcmle.2s\t$dst, $src1, $src2}", 5412 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5413def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # 5414 "|fcmle.4s\t$dst, $src1, $src2}", 5415 (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5416def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # 5417 "|fcmle.2d\t$dst, $src1, $src2}", 5418 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5419 5420let Predicates = [HasNEON, HasFullFP16] in { 5421def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # 5422 "|fcmlt.4h\t$dst, $src1, $src2}", 5423 (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5424def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # 5425 "|fcmlt.8h\t$dst, $src1, $src2}", 5426 (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5427} 5428def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # 5429 "|fcmlt.2s\t$dst, $src1, $src2}", 5430 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5431def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # 5432 "|fcmlt.4s\t$dst, $src1, $src2}", 5433 (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5434def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # 5435 "|fcmlt.2d\t$dst, $src1, $src2}", 5436 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5437 5438let Predicates = [HasNEON, HasFullFP16] in { 5439def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # 5440 "|facle.4h\t$dst, $src1, $src2}", 5441 (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5442def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # 5443 "|facle.8h\t$dst, $src1, $src2}", 5444 (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5445} 5446def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # 5447 "|facle.2s\t$dst, $src1, $src2}", 5448 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5449def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # 5450 "|facle.4s\t$dst, $src1, $src2}", 5451 (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5452def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # 5453 "|facle.2d\t$dst, $src1, $src2}", 5454 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5455 5456let Predicates = [HasNEON, HasFullFP16] in { 5457def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # 5458 "|faclt.4h\t$dst, $src1, $src2}", 5459 (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5460def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # 5461 "|faclt.8h\t$dst, $src1, $src2}", 5462 (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5463} 5464def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # 5465 "|faclt.2s\t$dst, $src1, $src2}", 5466 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5467def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # 5468 "|faclt.4s\t$dst, $src1, $src2}", 5469 (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5470def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # 5471 "|faclt.2d\t$dst, $src1, $src2}", 5472 (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5473 5474//===----------------------------------------------------------------------===// 5475// Advanced SIMD three scalar instructions. 5476//===----------------------------------------------------------------------===// 5477 5478defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; 5479defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; 5480defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; 5481defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; 5482defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; 5483defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; 5484defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; 5485defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; 5486def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 5487 (FABD64 FPR64:$Rn, FPR64:$Rm)>; 5488let Predicates = [HasNEON, HasFullFP16] in { 5489def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; 5490} 5491let Predicates = [HasNEON] in { 5492def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; 5493def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; 5494} 5495defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", 5496 int_aarch64_neon_facge>; 5497defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", 5498 int_aarch64_neon_facgt>; 5499defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 5500defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 5501defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 5502defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>; 5503defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>; 5504defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>; 5505defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; 5506defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; 5507defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 5508defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; 5509defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; 5510defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; 5511defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; 5512defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; 5513defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; 5514defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; 5515defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; 5516defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; 5517defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; 5518defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; 5519defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; 5520let Predicates = [HasRDM] in { 5521 defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; 5522 defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; 5523 def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5524 (i32 FPR32:$Rm))), 5525 (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5526 def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5527 (i32 FPR32:$Rm))), 5528 (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5529} 5530 5531defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64", 5532 int_aarch64_neon_fmulx, 5533 [HasNEONorSME]>; 5534 5535def : InstAlias<"cmls $dst, $src1, $src2", 5536 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5537def : InstAlias<"cmle $dst, $src1, $src2", 5538 (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5539def : InstAlias<"cmlo $dst, $src1, $src2", 5540 (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5541def : InstAlias<"cmlt $dst, $src1, $src2", 5542 (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5543def : InstAlias<"fcmle $dst, $src1, $src2", 5544 (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5545def : InstAlias<"fcmle $dst, $src1, $src2", 5546 (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5547def : InstAlias<"fcmlt $dst, $src1, $src2", 5548 (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5549def : InstAlias<"fcmlt $dst, $src1, $src2", 5550 (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5551def : InstAlias<"facle $dst, $src1, $src2", 5552 (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5553def : InstAlias<"facle $dst, $src1, $src2", 5554 (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5555def : InstAlias<"faclt $dst, $src1, $src2", 5556 (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5557def : InstAlias<"faclt $dst, $src1, $src2", 5558 (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5559 5560//===----------------------------------------------------------------------===// 5561// Advanced SIMD three scalar instructions (mixed operands). 5562//===----------------------------------------------------------------------===// 5563defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", 5564 int_aarch64_neon_sqdmulls_scalar>; 5565defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; 5566defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; 5567 5568def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), 5569 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5570 (i32 FPR32:$Rm))))), 5571 (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5572def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), 5573 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5574 (i32 FPR32:$Rm))))), 5575 (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5576 5577//===----------------------------------------------------------------------===// 5578// Advanced SIMD two scalar instructions. 5579//===----------------------------------------------------------------------===// 5580 5581defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs, [HasNoCSSC]>; 5582defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; 5583defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; 5584defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; 5585defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; 5586defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; 5587defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 5588defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 5589defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 5590defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 5591defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 5592defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; 5593defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; 5594defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; 5595defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; 5596defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; 5597defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; 5598defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; 5599defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; 5600def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; 5601defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; 5602defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; 5603defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>; 5604defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>; 5605defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>; 5606defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", 5607 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 5608defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; 5609defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 5610defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 5611defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; 5612defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; 5613defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", 5614 int_aarch64_neon_suqadd>; 5615defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; 5616defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; 5617defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", 5618 int_aarch64_neon_usqadd>; 5619 5620def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), 5621 (CMLTv1i64rz V64:$Rn)>; 5622 5623def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), 5624 (FCVTASv1i64 FPR64:$Rn)>; 5625def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), 5626 (FCVTAUv1i64 FPR64:$Rn)>; 5627def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), 5628 (FCVTMSv1i64 FPR64:$Rn)>; 5629def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), 5630 (FCVTMUv1i64 FPR64:$Rn)>; 5631def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), 5632 (FCVTNSv1i64 FPR64:$Rn)>; 5633def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), 5634 (FCVTNUv1i64 FPR64:$Rn)>; 5635def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), 5636 (FCVTPSv1i64 FPR64:$Rn)>; 5637def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), 5638 (FCVTPUv1i64 FPR64:$Rn)>; 5639def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), 5640 (FCVTZSv1i64 FPR64:$Rn)>; 5641def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), 5642 (FCVTZUv1i64 FPR64:$Rn)>; 5643 5644def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), 5645 (FRECPEv1f16 FPR16:$Rn)>; 5646def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), 5647 (FRECPEv1i32 FPR32:$Rn)>; 5648def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), 5649 (FRECPEv1i64 FPR64:$Rn)>; 5650def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), 5651 (FRECPEv1i64 FPR64:$Rn)>; 5652 5653def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), 5654 (FRECPEv1i32 FPR32:$Rn)>; 5655def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), 5656 (FRECPEv2f32 V64:$Rn)>; 5657def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), 5658 (FRECPEv4f32 FPR128:$Rn)>; 5659def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), 5660 (FRECPEv1i64 FPR64:$Rn)>; 5661def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), 5662 (FRECPEv1i64 FPR64:$Rn)>; 5663def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), 5664 (FRECPEv2f64 FPR128:$Rn)>; 5665 5666def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5667 (FRECPS32 FPR32:$Rn, FPR32:$Rm)>; 5668def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5669 (FRECPSv2f32 V64:$Rn, V64:$Rm)>; 5670def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5671 (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5672def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5673 (FRECPS64 FPR64:$Rn, FPR64:$Rm)>; 5674def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5675 (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5676 5677def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))), 5678 (FRECPXv1f16 FPR16:$Rn)>; 5679def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), 5680 (FRECPXv1i32 FPR32:$Rn)>; 5681def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), 5682 (FRECPXv1i64 FPR64:$Rn)>; 5683 5684def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))), 5685 (FRSQRTEv1f16 FPR16:$Rn)>; 5686def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), 5687 (FRSQRTEv1i32 FPR32:$Rn)>; 5688def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), 5689 (FRSQRTEv1i64 FPR64:$Rn)>; 5690def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), 5691 (FRSQRTEv1i64 FPR64:$Rn)>; 5692 5693def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), 5694 (FRSQRTEv1i32 FPR32:$Rn)>; 5695def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), 5696 (FRSQRTEv2f32 V64:$Rn)>; 5697def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), 5698 (FRSQRTEv4f32 FPR128:$Rn)>; 5699def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), 5700 (FRSQRTEv1i64 FPR64:$Rn)>; 5701def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), 5702 (FRSQRTEv1i64 FPR64:$Rn)>; 5703def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), 5704 (FRSQRTEv2f64 FPR128:$Rn)>; 5705 5706def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5707 (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>; 5708def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5709 (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>; 5710def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5711 (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5712def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5713 (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>; 5714def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5715 (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5716 5717// Some float -> int -> float conversion patterns for which we want to keep the 5718// int values in FP registers using the corresponding NEON instructions to 5719// avoid more costly int <-> fp register transfers. 5720let Predicates = [HasNEON] in { 5721def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), 5722 (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; 5723def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), 5724 (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; 5725def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), 5726 (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; 5727def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), 5728 (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; 5729 5730let Predicates = [HasFullFP16] in { 5731def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), 5732 (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; 5733def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), 5734 (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; 5735} 5736// If an integer is about to be converted to a floating point value, 5737// just load it on the floating point unit. 5738// Here are the patterns for 8 and 16-bits to float. 5739// 8-bits -> float. 5740multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy, 5741 SDPatternOperator loadop, Instruction UCVTF, 5742 ROAddrMode ro, Instruction LDRW, Instruction LDRX, 5743 SubRegIndex sub> { 5744 def : Pat<(DstTy (uint_to_fp (SrcTy 5745 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, 5746 ro.Wext:$extend))))), 5747 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5748 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 5749 sub))>; 5750 5751 def : Pat<(DstTy (uint_to_fp (SrcTy 5752 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, 5753 ro.Wext:$extend))))), 5754 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5755 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 5756 sub))>; 5757} 5758 5759defm : UIntToFPROLoadPat<f32, i32, zextloadi8, 5760 UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>; 5761def : Pat <(f32 (uint_to_fp (i32 5762 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5763 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5764 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5765def : Pat <(f32 (uint_to_fp (i32 5766 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5767 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5768 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5769// 16-bits -> float. 5770defm : UIntToFPROLoadPat<f32, i32, zextloadi16, 5771 UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>; 5772def : Pat <(f32 (uint_to_fp (i32 5773 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5774 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5775 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5776def : Pat <(f32 (uint_to_fp (i32 5777 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5778 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5779 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5780// 32-bits are handled in target specific dag combine: 5781// performIntToFpCombine. 5782// 64-bits integer to 32-bits floating point, not possible with 5783// UCVTF on floating point registers (both source and destination 5784// must have the same size). 5785 5786// Here are the patterns for 8, 16, 32, and 64-bits to double. 5787// 8-bits -> double. 5788defm : UIntToFPROLoadPat<f64, i32, zextloadi8, 5789 UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>; 5790def : Pat <(f64 (uint_to_fp (i32 5791 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5792 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5793 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5794def : Pat <(f64 (uint_to_fp (i32 5795 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5796 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5797 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5798// 16-bits -> double. 5799defm : UIntToFPROLoadPat<f64, i32, zextloadi16, 5800 UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>; 5801def : Pat <(f64 (uint_to_fp (i32 5802 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5803 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5804 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5805def : Pat <(f64 (uint_to_fp (i32 5806 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5807 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5808 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5809// 32-bits -> double. 5810defm : UIntToFPROLoadPat<f64, i32, load, 5811 UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>; 5812def : Pat <(f64 (uint_to_fp (i32 5813 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 5814 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5815 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; 5816def : Pat <(f64 (uint_to_fp (i32 5817 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), 5818 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5819 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; 5820// 64-bits -> double are handled in target specific dag combine: 5821// performIntToFpCombine. 5822} // let Predicates = [HasNEON] 5823 5824//===----------------------------------------------------------------------===// 5825// Advanced SIMD three different-sized vector instructions. 5826//===----------------------------------------------------------------------===// 5827 5828defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; 5829defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; 5830defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; 5831defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; 5832defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>; 5833defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", 5834 AArch64sabd>; 5835defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", 5836 AArch64sabd>; 5837defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", 5838 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; 5839defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", 5840 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; 5841defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", 5842 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5843defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", 5844 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5845defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>; 5846defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", 5847 int_aarch64_neon_sqadd>; 5848defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", 5849 int_aarch64_neon_sqsub>; 5850defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", 5851 int_aarch64_neon_sqdmull>; 5852defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", 5853 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; 5854defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", 5855 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; 5856defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", 5857 AArch64uabd>; 5858defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", 5859 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; 5860defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", 5861 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; 5862defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", 5863 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5864defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", 5865 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5866defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>; 5867defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", 5868 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; 5869defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", 5870 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; 5871 5872// Additional patterns for [SU]ML[AS]L 5873multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode, 5874 Instruction INST8B, Instruction INST4H, Instruction INST2S> { 5875 def : Pat<(v4i16 (opnode 5876 V64:$Ra, 5877 (v4i16 (extract_subvector 5878 (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)), 5879 (i64 0))))), 5880 (EXTRACT_SUBREG (v8i16 (INST8B 5881 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub), 5882 V64:$Rn, V64:$Rm)), dsub)>; 5883 def : Pat<(v2i32 (opnode 5884 V64:$Ra, 5885 (v2i32 (extract_subvector 5886 (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)), 5887 (i64 0))))), 5888 (EXTRACT_SUBREG (v4i32 (INST4H 5889 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub), 5890 V64:$Rn, V64:$Rm)), dsub)>; 5891 def : Pat<(v1i64 (opnode 5892 V64:$Ra, 5893 (v1i64 (extract_subvector 5894 (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)), 5895 (i64 0))))), 5896 (EXTRACT_SUBREG (v2i64 (INST2S 5897 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub), 5898 V64:$Rn, V64:$Rm)), dsub)>; 5899} 5900 5901defm : Neon_mul_acc_widen_patterns<add, AArch64umull, 5902 UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; 5903defm : Neon_mul_acc_widen_patterns<add, AArch64smull, 5904 SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; 5905defm : Neon_mul_acc_widen_patterns<sub, AArch64umull, 5906 UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; 5907defm : Neon_mul_acc_widen_patterns<sub, AArch64smull, 5908 SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; 5909 5910 5911multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> { 5912 def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)), 5913 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), 5914 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>; 5915 def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)), 5916 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), 5917 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>; 5918 def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)), 5919 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), 5920 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>; 5921 5922 def : Pat<(v4i16 (opnode (v4i16 V64:$Rn), 5923 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), 5924 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5925 def : Pat<(v2i32 (opnode (v2i32 V64:$Rn), 5926 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), 5927 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5928 def : Pat<(v1i64 (opnode (v1i64 V64:$Rn), 5929 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), 5930 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5931} 5932 5933defm : Neon_addl_extract_patterns<add, zanyext, "UADD">; 5934defm : Neon_addl_extract_patterns<add, sext, "SADD">; 5935defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">; 5936defm : Neon_addl_extract_patterns<sub, sext, "SSUB">; 5937 5938// CodeGen patterns for addhn and subhn instructions, which can actually be 5939// written in LLVM IR without too much difficulty. 5940 5941// Prioritize ADDHN and SUBHN over UZP2. 5942let AddedComplexity = 10 in { 5943 5944// ADDHN 5945def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), 5946 (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5947def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5948 (i32 16))))), 5949 (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5950def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5951 (i32 32))))), 5952 (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5953def : Pat<(concat_vectors (v8i8 V64:$Rd), 5954 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5955 (i32 8))))), 5956 (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5957 V128:$Rn, V128:$Rm)>; 5958def : Pat<(concat_vectors (v4i16 V64:$Rd), 5959 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5960 (i32 16))))), 5961 (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5962 V128:$Rn, V128:$Rm)>; 5963def : Pat<(concat_vectors (v2i32 V64:$Rd), 5964 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5965 (i32 32))))), 5966 (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5967 V128:$Rn, V128:$Rm)>; 5968 5969// SUBHN 5970def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), 5971 (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5972def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5973 (i32 16))))), 5974 (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5975def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5976 (i32 32))))), 5977 (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5978def : Pat<(concat_vectors (v8i8 V64:$Rd), 5979 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5980 (i32 8))))), 5981 (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5982 V128:$Rn, V128:$Rm)>; 5983def : Pat<(concat_vectors (v4i16 V64:$Rd), 5984 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5985 (i32 16))))), 5986 (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5987 V128:$Rn, V128:$Rm)>; 5988def : Pat<(concat_vectors (v2i32 V64:$Rd), 5989 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5990 (i32 32))))), 5991 (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5992 V128:$Rn, V128:$Rm)>; 5993 5994} // AddedComplexity = 10 5995 5996//---------------------------------------------------------------------------- 5997// AdvSIMD bitwise extract from vector instruction. 5998//---------------------------------------------------------------------------- 5999 6000defm EXT : SIMDBitwiseExtract<"ext">; 6001 6002def AdjustExtImm : SDNodeXForm<imm, [{ 6003 return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32); 6004}]>; 6005multiclass ExtPat<ValueType VT64, ValueType VT128, int N> { 6006 def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 6007 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 6008 def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 6009 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 6010 // We use EXT to handle extract_subvector to copy the upper 64-bits of a 6011 // 128-bit vector. 6012 def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))), 6013 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 6014 // A 64-bit EXT of two halves of the same 128-bit register can be done as a 6015 // single 128-bit EXT. 6016 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)), 6017 (extract_subvector V128:$Rn, (i64 N)), 6018 (i32 imm:$imm))), 6019 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>; 6020 // A 64-bit EXT of the high half of a 128-bit register can be done using a 6021 // 128-bit EXT of the whole register with an adjustment to the immediate. The 6022 // top half of the other operand will be unset, but that doesn't matter as it 6023 // will not be used. 6024 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)), 6025 V64:$Rm, 6026 (i32 imm:$imm))), 6027 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, 6028 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 6029 (AdjustExtImm imm:$imm)), dsub)>; 6030} 6031 6032defm : ExtPat<v8i8, v16i8, 8>; 6033defm : ExtPat<v4i16, v8i16, 4>; 6034defm : ExtPat<v4f16, v8f16, 4>; 6035defm : ExtPat<v4bf16, v8bf16, 4>; 6036defm : ExtPat<v2i32, v4i32, 2>; 6037defm : ExtPat<v2f32, v4f32, 2>; 6038defm : ExtPat<v1i64, v2i64, 1>; 6039defm : ExtPat<v1f64, v2f64, 1>; 6040 6041//---------------------------------------------------------------------------- 6042// AdvSIMD zip vector 6043//---------------------------------------------------------------------------- 6044 6045defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; 6046defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; 6047defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; 6048defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; 6049defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; 6050defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; 6051 6052def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))), 6053 (v8i8 (trunc (v8i16 V128:$Vm))))), 6054 (UZP1v16i8 V128:$Vn, V128:$Vm)>; 6055def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))), 6056 (v4i16 (trunc (v4i32 V128:$Vm))))), 6057 (UZP1v8i16 V128:$Vn, V128:$Vm)>; 6058def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), 6059 (v2i32 (trunc (v2i64 V128:$Vm))))), 6060 (UZP1v4i32 V128:$Vn, V128:$Vm)>; 6061// These are the same as above, with an optional assertzext node that can be 6062// generated from fptoi lowering. 6063def : Pat<(v16i8 (concat_vectors (v8i8 (assertzext (trunc (v8i16 V128:$Vn)))), 6064 (v8i8 (assertzext (trunc (v8i16 V128:$Vm)))))), 6065 (UZP1v16i8 V128:$Vn, V128:$Vm)>; 6066def : Pat<(v8i16 (concat_vectors (v4i16 (assertzext (trunc (v4i32 V128:$Vn)))), 6067 (v4i16 (assertzext (trunc (v4i32 V128:$Vm)))))), 6068 (UZP1v8i16 V128:$Vn, V128:$Vm)>; 6069def : Pat<(v4i32 (concat_vectors (v2i32 (assertzext (trunc (v2i64 V128:$Vn)))), 6070 (v2i32 (assertzext (trunc (v2i64 V128:$Vm)))))), 6071 (UZP1v4i32 V128:$Vn, V128:$Vm)>; 6072 6073def : Pat<(v16i8 (concat_vectors 6074 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), 6075 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), 6076 (UZP2v16i8 V128:$Vn, V128:$Vm)>; 6077def : Pat<(v8i16 (concat_vectors 6078 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), 6079 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), 6080 (UZP2v8i16 V128:$Vn, V128:$Vm)>; 6081def : Pat<(v4i32 (concat_vectors 6082 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), 6083 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), 6084 (UZP2v4i32 V128:$Vn, V128:$Vm)>; 6085 6086//---------------------------------------------------------------------------- 6087// AdvSIMD TBL/TBX instructions 6088//---------------------------------------------------------------------------- 6089 6090defm TBL : SIMDTableLookup< 0, "tbl">; 6091defm TBX : SIMDTableLookupTied<1, "tbx">; 6092 6093def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 6094 (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; 6095def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 6096 (TBLv16i8One V128:$Ri, V128:$Rn)>; 6097 6098def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), 6099 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 6100 (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; 6101def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), 6102 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 6103 (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; 6104 6105//---------------------------------------------------------------------------- 6106// AdvSIMD LUT instructions 6107//---------------------------------------------------------------------------- 6108let Predicates = [HasLUT] in { 6109 defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">; 6110 defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">; 6111} 6112 6113//---------------------------------------------------------------------------- 6114// AdvSIMD scalar DUP instruction 6115//---------------------------------------------------------------------------- 6116 6117defm DUP : SIMDScalarDUP<"mov">; 6118 6119//---------------------------------------------------------------------------- 6120// AdvSIMD scalar pairwise instructions 6121//---------------------------------------------------------------------------- 6122 6123defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; 6124defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; 6125defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; 6126defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; 6127defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; 6128defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; 6129 6130// Only the lower half of the result of the inner FADDP is used in the patterns 6131// below, so the second operand does not matter. Re-use the first input 6132// operand, so no additional dependencies need to be introduced. 6133let Predicates = [HasFullFP16] in { 6134def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))), 6135 (FADDPv2i16p 6136 (EXTRACT_SUBREG 6137 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn), 6138 dsub))>; 6139def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))), 6140 (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>; 6141} 6142def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))), 6143 (FADDPv2i32p 6144 (EXTRACT_SUBREG 6145 (FADDPv4f32 V128:$Rn, V128:$Rn), 6146 dsub))>; 6147def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))), 6148 (FADDPv2i32p V64:$Rn)>; 6149def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))), 6150 (FADDPv2i64p V128:$Rn)>; 6151 6152def : Pat<(v2i64 (AArch64saddv V128:$Rn)), 6153 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 6154def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), 6155 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 6156def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), 6157 (FADDPv2i32p V64:$Rn)>; 6158def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), 6159 (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; 6160def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), 6161 (FADDPv2i64p V128:$Rn)>; 6162def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))), 6163 (FMAXNMPv2i32p V64:$Rn)>; 6164def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))), 6165 (FMAXNMPv2i64p V128:$Rn)>; 6166def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))), 6167 (FMAXPv2i32p V64:$Rn)>; 6168def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))), 6169 (FMAXPv2i64p V128:$Rn)>; 6170def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))), 6171 (FMINNMPv2i32p V64:$Rn)>; 6172def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))), 6173 (FMINNMPv2i64p V128:$Rn)>; 6174def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))), 6175 (FMINPv2i32p V64:$Rn)>; 6176def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))), 6177 (FMINPv2i64p V128:$Rn)>; 6178 6179//---------------------------------------------------------------------------- 6180// AdvSIMD INS/DUP instructions 6181//---------------------------------------------------------------------------- 6182 6183def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; 6184def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; 6185def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; 6186def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; 6187def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; 6188def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; 6189def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; 6190 6191def DUPv2i64lane : SIMDDup64FromElement; 6192def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; 6193def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; 6194def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; 6195def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; 6196def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; 6197def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; 6198 6199// DUP from a 64-bit register to a 64-bit register is just a copy 6200def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))), 6201 (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>; 6202def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))), 6203 (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>; 6204 6205def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), 6206 (v2f32 (DUPv2i32lane 6207 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 6208 (i64 0)))>; 6209def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), 6210 (v4f32 (DUPv4i32lane 6211 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 6212 (i64 0)))>; 6213def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), 6214 (v2f64 (DUPv2i64lane 6215 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), 6216 (i64 0)))>; 6217def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), 6218 (v4f16 (DUPv4i16lane 6219 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6220 (i64 0)))>; 6221def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))), 6222 (v4bf16 (DUPv4i16lane 6223 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6224 (i64 0)))>; 6225def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), 6226 (v8f16 (DUPv8i16lane 6227 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6228 (i64 0)))>; 6229def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))), 6230 (v8bf16 (DUPv8i16lane 6231 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6232 (i64 0)))>; 6233 6234def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 6235 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 6236def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 6237 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 6238 6239def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 6240 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 6241def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 6242 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 6243 6244def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 6245 (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; 6246def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 6247 (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; 6248def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), 6249 (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; 6250 6251// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane 6252// instruction even if the types don't match: we just have to remap the lane 6253// carefully. N.b. this trick only applies to truncations. 6254def VecIndex_x2 : SDNodeXForm<imm, [{ 6255 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); 6256}]>; 6257def VecIndex_x4 : SDNodeXForm<imm, [{ 6258 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); 6259}]>; 6260def VecIndex_x8 : SDNodeXForm<imm, [{ 6261 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); 6262}]>; 6263 6264multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, 6265 ValueType Src128VT, ValueType ScalVT, 6266 Instruction DUP, SDNodeXForm IdxXFORM> { 6267 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), 6268 imm:$idx)))), 6269 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 6270 6271 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), 6272 imm:$idx)))), 6273 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 6274} 6275 6276defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; 6277defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; 6278defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; 6279 6280defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; 6281defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; 6282defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; 6283 6284multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, 6285 SDNodeXForm IdxXFORM> { 6286 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), 6287 imm:$idx))))), 6288 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 6289 6290 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), 6291 imm:$idx))))), 6292 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 6293} 6294 6295defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; 6296defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; 6297defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; 6298 6299defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; 6300defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; 6301defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; 6302 6303// SMOV and UMOV definitions, with some extra patterns for convenience 6304defm SMOV : SMov; 6305defm UMOV : UMov; 6306 6307def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 6308 (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; 6309def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 6310 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 6311def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6312 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 6313def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6314 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 6315def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6316 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 6317def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), 6318 (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; 6319 6320def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 6321 VectorIndexB:$idx)))), i8), 6322 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 6323def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 6324 VectorIndexH:$idx)))), i16), 6325 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 6326 6327// Extracting i8 or i16 elements will have the zero-extend transformed to 6328// an 'and' mask by type legalization since neither i8 nor i16 are legal types 6329// for AArch64. Match these patterns here since UMOV already zeroes out the high 6330// bits of the destination register. 6331def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), 6332 (i32 0xff)), 6333 (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; 6334def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), 6335 (i32 0xffff)), 6336 (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; 6337 6338def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 6339 VectorIndexB:$idx)))), (i64 0xff))), 6340 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>; 6341def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 6342 VectorIndexH:$idx)))), (i64 0xffff))), 6343 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>; 6344 6345defm INS : SIMDIns; 6346 6347def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), 6348 (SUBREG_TO_REG (i32 0), 6349 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6350def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), 6351 (SUBREG_TO_REG (i32 0), 6352 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6353 6354// The top bits will be zero from the FMOVWSr 6355def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))), 6356 (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>; 6357 6358def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), 6359 (SUBREG_TO_REG (i32 0), 6360 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6361def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), 6362 (SUBREG_TO_REG (i32 0), 6363 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6364 6365def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 6366 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6367def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 6368 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6369 6370def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6371 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6372def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6373 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6374 6375def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), 6376 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 6377 (i32 FPR32:$Rn), ssub))>; 6378def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), 6379 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6380 (i32 FPR32:$Rn), ssub))>; 6381 6382def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), 6383 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 6384 (i64 FPR64:$Rn), dsub))>; 6385 6386def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 6387 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6388def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 6389 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6390 6391def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6392 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6393def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6394 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6395 6396def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), 6397 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 6398def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), 6399 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 6400 6401def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), 6402 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; 6403 6404def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), 6405 (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 6406 (EXTRACT_SUBREG 6407 (INSvi16lane 6408 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6409 VectorIndexS:$imm, 6410 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6411 (i64 0)), 6412 dsub)>; 6413 6414def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), 6415 (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>; 6416def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), 6417 (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>; 6418def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), 6419 (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>; 6420def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), 6421 (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>; 6422def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)), 6423 (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>; 6424 6425def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), 6426 (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 6427 (INSvi16lane 6428 V128:$Rn, VectorIndexH:$imm, 6429 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6430 (i64 0))>; 6431 6432def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn), 6433 (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 6434 (EXTRACT_SUBREG 6435 (INSvi16lane 6436 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6437 VectorIndexS:$imm, 6438 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6439 (i64 0)), 6440 dsub)>; 6441 6442def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn), 6443 (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 6444 (INSvi16lane 6445 V128:$Rn, VectorIndexH:$imm, 6446 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6447 (i64 0))>; 6448 6449def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), 6450 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 6451 (EXTRACT_SUBREG 6452 (INSvi32lane 6453 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6454 VectorIndexS:$imm, 6455 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 6456 (i64 0)), 6457 dsub)>; 6458def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), 6459 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 6460 (INSvi32lane 6461 V128:$Rn, VectorIndexS:$imm, 6462 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 6463 (i64 0))>; 6464def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), 6465 (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), 6466 (INSvi64lane 6467 V128:$Rn, VectorIndexD:$imm, 6468 (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), 6469 (i64 0))>; 6470 6471def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))), 6472 (EXTRACT_SUBREG 6473 (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6474 VectorIndexS:$imm, GPR32:$Rm), 6475 dsub)>; 6476def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))), 6477 (EXTRACT_SUBREG 6478 (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6479 VectorIndexH:$imm, GPR32:$Rm), 6480 dsub)>; 6481def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))), 6482 (EXTRACT_SUBREG 6483 (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6484 VectorIndexB:$imm, GPR32:$Rm), 6485 dsub)>; 6486 6487// Copy an element at a constant index in one vector into a constant indexed 6488// element of another. 6489// FIXME refactor to a shared class/dev parameterized on vector type, vector 6490// index type and INS extension 6491def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane 6492 (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), 6493 VectorIndexB:$idx2)), 6494 (v16i8 (INSvi8lane 6495 V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) 6496 )>; 6497def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane 6498 (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), 6499 VectorIndexH:$idx2)), 6500 (v8i16 (INSvi16lane 6501 V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) 6502 )>; 6503def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane 6504 (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), 6505 VectorIndexS:$idx2)), 6506 (v4i32 (INSvi32lane 6507 V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) 6508 )>; 6509def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane 6510 (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), 6511 VectorIndexD:$idx2)), 6512 (v2i64 (INSvi64lane 6513 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) 6514 )>; 6515 6516multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, 6517 ValueType VTScal, Instruction INS> { 6518 def : Pat<(VT128 (vector_insert V128:$src, 6519 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6520 imm:$Immd)), 6521 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; 6522 6523 def : Pat<(VT128 (vector_insert V128:$src, 6524 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6525 imm:$Immd)), 6526 (INS V128:$src, imm:$Immd, 6527 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; 6528 6529 def : Pat<(VT64 (vector_insert V64:$src, 6530 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6531 imm:$Immd)), 6532 (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), 6533 imm:$Immd, V128:$Rn, imm:$Immn), 6534 dsub)>; 6535 6536 def : Pat<(VT64 (vector_insert V64:$src, 6537 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6538 imm:$Immd)), 6539 (EXTRACT_SUBREG 6540 (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, 6541 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), 6542 dsub)>; 6543} 6544 6545defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; 6546defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>; 6547defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; 6548defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; 6549 6550defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>; 6551defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>; 6552defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>; 6553defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>; 6554 6555// Insert from bitcast 6556// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0) 6557def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6558 (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>; 6559def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6560 (EXTRACT_SUBREG 6561 (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)), 6562 imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0), 6563 dsub)>; 6564def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)), 6565 (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>; 6566 6567// bitcast of an extract 6568// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) 6569def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), 6570 (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>; 6571def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))), 6572 (EXTRACT_SUBREG V128:$src, ssub)>; 6573def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), 6574 (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>; 6575def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))), 6576 (EXTRACT_SUBREG V128:$src, dsub)>; 6577 6578// Floating point vector extractions are codegen'd as either a sequence of 6579// subregister extractions, or a MOV (aka DUP here) if 6580// the lane number is anything other than zero. 6581def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))), 6582 (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; 6583def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))), 6584 (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; 6585def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))), 6586 (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6587def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))), 6588 (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6589 6590 6591def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), 6592 (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>; 6593def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), 6594 (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>; 6595def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), 6596 (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6597def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), 6598 (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6599 6600// All concat_vectors operations are canonicalised to act on i64 vectors for 6601// AArch64. In the general case we need an instruction, which had just as well be 6602// INS. 6603class ConcatPat<ValueType DstTy, ValueType SrcTy> 6604 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), 6605 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, 6606 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; 6607 6608def : ConcatPat<v2i64, v1i64>; 6609def : ConcatPat<v2f64, v1f64>; 6610def : ConcatPat<v4i32, v2i32>; 6611def : ConcatPat<v4f32, v2f32>; 6612def : ConcatPat<v8i16, v4i16>; 6613def : ConcatPat<v8f16, v4f16>; 6614def : ConcatPat<v8bf16, v4bf16>; 6615def : ConcatPat<v16i8, v8i8>; 6616 6617// If the high lanes are undef, though, we can just ignore them: 6618class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> 6619 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), 6620 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; 6621 6622def : ConcatUndefPat<v2i64, v1i64>; 6623def : ConcatUndefPat<v2f64, v1f64>; 6624def : ConcatUndefPat<v4i32, v2i32>; 6625def : ConcatUndefPat<v4f32, v2f32>; 6626def : ConcatUndefPat<v8i16, v4i16>; 6627def : ConcatUndefPat<v16i8, v8i8>; 6628 6629//---------------------------------------------------------------------------- 6630// AdvSIMD across lanes instructions 6631//---------------------------------------------------------------------------- 6632 6633defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; 6634defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; 6635defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; 6636defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; 6637defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; 6638defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; 6639defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; 6640defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>; 6641defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>; 6642defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>; 6643defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>; 6644 6645multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> { 6646 // Patterns for addv(addlp(x)) ==> addlv 6647 def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, 6648 (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))), 6649 (i64 0))), (i64 0))), 6650 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6651 (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>; 6652 def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))), 6653 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6654 (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>; 6655 def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))), 6656 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>; 6657 6658 // Patterns for addp(addlp(x))) ==> addlv 6659 def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))), 6660 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>; 6661 def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))), 6662 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>; 6663} 6664 6665defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>; 6666defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>; 6667 6668// Pattern is used for GlobalISel 6669multiclass SIMDAcrossLaneLongPairIntrinsicGISel<string Opc, SDPatternOperator addlp> { 6670 // Patterns for addv(addlp(x)) ==> addlv 6671 def : Pat<(i16 (vecreduce_add (v4i16 (addlp (v8i8 V64:$Rn))))), 6672 (!cast<Instruction>(Opc#"v8i8v") V64:$Rn)>; 6673 def : Pat<(i16 (vecreduce_add (v8i16 (addlp (v16i8 V128:$Rn))))), 6674 (!cast<Instruction>(Opc#"v16i8v") V128:$Rn)>; 6675 def : Pat<(i32 (vecreduce_add (v4i32 (addlp (v8i16 V128:$Rn))))), 6676 (!cast<Instruction>(Opc#"v8i16v") V128:$Rn)>; 6677 6678 // Patterns for addp(addlp(x))) ==> addlv 6679 def : Pat<(i32 (vecreduce_add (v2i32 (addlp (v4i16 V64:$Rn))))), 6680 (!cast<Instruction>(Opc#"v4i16v") V64:$Rn)>; 6681 def : Pat<(i64 (vecreduce_add (v2i64 (addlp (v4i32 V128:$Rn))))), 6682 (!cast<Instruction>(Opc#"v4i32v") V128:$Rn)>; 6683} 6684 6685defm : SIMDAcrossLaneLongPairIntrinsicGISel<"UADDLV", AArch64uaddlp>; 6686defm : SIMDAcrossLaneLongPairIntrinsicGISel<"SADDLV", AArch64saddlp>; 6687 6688// Patterns for uaddlv(uaddlp(x)) ==> uaddlv 6689def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), 6690 (i64 (EXTRACT_SUBREG 6691 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)), 6692 dsub))>; 6693 6694def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))), 6695 (i32 (EXTRACT_SUBREG 6696 (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)), 6697 ssub))>; 6698 6699def : Pat<(v2i64 (AArch64uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), 6700 (v2i64 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub))>; 6701 6702def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))), 6703 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>; 6704 6705def : Pat<(v4i32 (AArch64uaddlv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))), 6706 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub))>; 6707 6708multiclass SIMDAcrossLaneLongReductionIntrinsic<string Opc, SDPatternOperator addlv> { 6709 def : Pat<(v4i32 (addlv (v8i8 V64:$Rn))), 6710 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i8v") V64:$Rn), hsub))>; 6711 6712 def : Pat<(v4i32 (addlv (v4i16 V64:$Rn))), 6713 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i16v") V64:$Rn), ssub))>; 6714 6715 def : Pat<(v4i32 (addlv (v16i8 V128:$Rn))), 6716 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v16i8v") V128:$Rn), hsub))>; 6717 6718 def : Pat<(v4i32 (addlv (v8i16 V128:$Rn))), 6719 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i16v") V128:$Rn), ssub))>; 6720 6721 def : Pat<(v2i64 (addlv (v4i32 V128:$Rn))), 6722 (v2i64 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i32v") V128:$Rn), dsub))>; 6723} 6724 6725defm : SIMDAcrossLaneLongReductionIntrinsic<"UADDLV", AArch64uaddlv>; 6726defm : SIMDAcrossLaneLongReductionIntrinsic<"SADDLV", AArch64saddlv>; 6727 6728// Patterns for across-vector intrinsics, that have a node equivalent, that 6729// returns a vector (with only the low lane defined) instead of a scalar. 6730// In effect, opNode is the same as (scalar_to_vector (IntNode)). 6731multiclass SIMDAcrossLanesIntrinsic<string baseOpc, 6732 SDPatternOperator opNode> { 6733// If a lane instruction caught the vector_extract around opNode, we can 6734// directly match the latter to the instruction. 6735def : Pat<(v8i8 (opNode V64:$Rn)), 6736 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6737 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; 6738def : Pat<(v16i8 (opNode V128:$Rn)), 6739 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6740 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; 6741def : Pat<(v4i16 (opNode V64:$Rn)), 6742 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6743 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; 6744def : Pat<(v8i16 (opNode V128:$Rn)), 6745 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6746 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; 6747def : Pat<(v4i32 (opNode V128:$Rn)), 6748 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6749 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; 6750 6751 6752// If none did, fallback to the explicit patterns, consuming the vector_extract. 6753def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), 6754 (i64 0)), (i64 0))), 6755 (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6756 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), 6757 bsub), ssub)>; 6758def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), 6759 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6760 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), 6761 bsub), ssub)>; 6762def : Pat<(i32 (vector_extract (insert_subvector undef, 6763 (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))), 6764 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6765 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), 6766 hsub), ssub)>; 6767def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), 6768 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6769 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), 6770 hsub), ssub)>; 6771def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), 6772 (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6773 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), 6774 ssub), ssub)>; 6775 6776} 6777 6778multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, 6779 SDPatternOperator opNode> 6780 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6781// If there is a sign extension after this intrinsic, consume it as smov already 6782// performed it 6783def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6784 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)), 6785 (i32 (SMOVvi8to32 6786 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6787 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6788 (i64 0)))>; 6789def : Pat<(i32 (sext_inreg (i32 (vector_extract 6790 (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), 6791 (i32 (SMOVvi8to32 6792 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6793 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6794 (i64 0)))>; 6795def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6796 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)), 6797 (i32 (SMOVvi16to32 6798 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6799 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6800 (i64 0)))>; 6801def : Pat<(i32 (sext_inreg (i32 (vector_extract 6802 (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), 6803 (i32 (SMOVvi16to32 6804 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6805 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6806 (i64 0)))>; 6807} 6808 6809multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, 6810 SDPatternOperator opNode> 6811 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6812// If there is a masking operation keeping only what has been actually 6813// generated, consume it. 6814def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6815 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)), 6816 (i32 (EXTRACT_SUBREG 6817 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6818 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6819 ssub))>; 6820def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), 6821 maski8_or_more)), 6822 (i32 (EXTRACT_SUBREG 6823 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6824 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6825 ssub))>; 6826def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6827 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)), 6828 (i32 (EXTRACT_SUBREG 6829 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6830 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6831 ssub))>; 6832def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), 6833 maski16_or_more)), 6834 (i32 (EXTRACT_SUBREG 6835 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6836 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6837 ssub))>; 6838} 6839 6840// For vecreduce_add, used by GlobalISel not SDAG 6841def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))), 6842 (i8 (ADDVv8i8v V64:$Rn))>; 6843def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))), 6844 (i8 (ADDVv16i8v V128:$Rn))>; 6845def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))), 6846 (i16 (ADDVv4i16v V64:$Rn))>; 6847def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))), 6848 (i16 (ADDVv8i16v V128:$Rn))>; 6849def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))), 6850 (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6851def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))), 6852 (i32 (ADDVv4i32v V128:$Rn))>; 6853def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))), 6854 (i64 (ADDPv2i64p V128:$Rn))>; 6855 6856defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; 6857// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6858def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), 6859 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6860 6861defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; 6862// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6863def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), 6864 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6865 6866defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; 6867def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), 6868 (SMAXPv2i32 V64:$Rn, V64:$Rn)>; 6869 6870defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; 6871def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), 6872 (SMINPv2i32 V64:$Rn, V64:$Rn)>; 6873 6874defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; 6875def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), 6876 (UMAXPv2i32 V64:$Rn, V64:$Rn)>; 6877 6878defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; 6879def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), 6880 (UMINPv2i32 V64:$Rn, V64:$Rn)>; 6881 6882// For vecreduce_{opc} used by GlobalISel, not SDAG at the moment 6883// because GlobalISel allows us to specify the return register to be a FPR 6884multiclass SIMDAcrossLanesVecReductionIntrinsic<string baseOpc, 6885 SDPatternOperator opNode> { 6886def : Pat<(i8 (opNode (v8i8 FPR64:$Rn))), 6887 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) FPR64:$Rn)>; 6888 6889def : Pat<(i8 (opNode (v16i8 FPR128:$Rn))), 6890 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) FPR128:$Rn)>; 6891 6892def : Pat<(i16 (opNode (v4i16 FPR64:$Rn))), 6893 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) FPR64:$Rn)>; 6894 6895def : Pat<(i16 (opNode (v8i16 FPR128:$Rn))), 6896 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) FPR128:$Rn)>; 6897 6898def : Pat<(i32 (opNode (v4i32 V128:$Rn))), 6899 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn)>; 6900} 6901 6902// For v2i32 source type, the pairwise instruction can be used instead 6903defm : SIMDAcrossLanesVecReductionIntrinsic<"UMINV", vecreduce_umin>; 6904def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))), 6905 (i32 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6906 6907defm : SIMDAcrossLanesVecReductionIntrinsic<"UMAXV", vecreduce_umax>; 6908def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))), 6909 (i32 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6910 6911defm : SIMDAcrossLanesVecReductionIntrinsic<"SMINV", vecreduce_smin>; 6912def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))), 6913 (i32 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6914 6915defm : SIMDAcrossLanesVecReductionIntrinsic<"SMAXV", vecreduce_smax>; 6916def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))), 6917 (i32 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6918 6919multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { 6920 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6921 (i32 (SMOVvi16to32 6922 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6923 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6924 (i64 0)))>; 6925def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6926 (i32 (SMOVvi16to32 6927 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6928 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6929 (i64 0)))>; 6930 6931def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6932 (i32 (EXTRACT_SUBREG 6933 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6934 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6935 ssub))>; 6936def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6937 (i32 (EXTRACT_SUBREG 6938 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6939 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6940 ssub))>; 6941 6942def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6943 (i64 (EXTRACT_SUBREG 6944 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6945 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6946 dsub))>; 6947} 6948 6949multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, 6950 Intrinsic intOp> { 6951 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6952 (i32 (EXTRACT_SUBREG 6953 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6954 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6955 ssub))>; 6956def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6957 (i32 (EXTRACT_SUBREG 6958 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6959 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6960 ssub))>; 6961 6962def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6963 (i32 (EXTRACT_SUBREG 6964 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6965 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6966 ssub))>; 6967def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6968 (i32 (EXTRACT_SUBREG 6969 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6970 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6971 ssub))>; 6972 6973def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6974 (i64 (EXTRACT_SUBREG 6975 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6976 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6977 dsub))>; 6978} 6979 6980defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; 6981defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; 6982 6983// The vaddlv_s32 intrinsic gets mapped to SADDLP. 6984def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), 6985 (i64 (EXTRACT_SUBREG 6986 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6987 (SADDLPv2i32_v1i64 V64:$Rn), dsub), 6988 dsub))>; 6989// The vaddlv_u32 intrinsic gets mapped to UADDLP. 6990def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), 6991 (i64 (EXTRACT_SUBREG 6992 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6993 (UADDLPv2i32_v1i64 V64:$Rn), dsub), 6994 dsub))>; 6995 6996//------------------------------------------------------------------------------ 6997// AdvSIMD modified immediate instructions 6998//------------------------------------------------------------------------------ 6999 7000// AdvSIMD BIC 7001defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; 7002// AdvSIMD ORR 7003defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; 7004 7005def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 7006def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 7007def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 7008def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 7009 7010def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 7011def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 7012def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 7013def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 7014 7015def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 7016def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 7017def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 7018def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 7019 7020def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 7021def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 7022def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 7023def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 7024 7025// AdvSIMD FMOV 7026def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, 7027 "fmov", ".2d", 7028 [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 7029def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, 7030 "fmov", ".2s", 7031 [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 7032def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, 7033 "fmov", ".4s", 7034 [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 7035let Predicates = [HasNEON, HasFullFP16] in { 7036def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, 7037 "fmov", ".4h", 7038 [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 7039def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, 7040 "fmov", ".8h", 7041 [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 7042} // Predicates = [HasNEON, HasFullFP16] 7043 7044// AdvSIMD MOVI 7045 7046// EDIT byte mask: scalar 7047let isReMaterializable = 1, isAsCheapAsAMove = 1 in 7048def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", 7049 [(set FPR64:$Rd, simdimmtype10:$imm8)]>; 7050// The movi_edit node has the immediate value already encoded, so we use 7051// a plain imm0_255 here. 7052def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), 7053 (MOVID imm0_255:$shift)>; 7054 7055// EDIT byte mask: 2d 7056 7057// The movi_edit node has the immediate value already encoded, so we use 7058// a plain imm0_255 in the pattern 7059let isReMaterializable = 1, isAsCheapAsAMove = 1 in 7060def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, 7061 simdimmtype10, 7062 "movi", ".2d", 7063 [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; 7064 7065def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7066def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7067def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7068def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7069def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7070def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7071def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7072def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7073 7074def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7075def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7076def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7077def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7078 7079// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the 7080// extract is free and this gives better MachineCSE results. 7081def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7082def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7083def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7084def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7085def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>; 7086def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>; 7087def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>; 7088def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>; 7089 7090def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7091def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7092def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7093def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7094 7095// EDIT per word & halfword: 2s, 4h, 4s, & 8h 7096let isReMaterializable = 1, isAsCheapAsAMove = 1 in 7097defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; 7098 7099let Predicates = [HasNEON] in { 7100 // Using the MOVI to materialize fp constants. 7101 def : Pat<(f32 fpimm32SIMDModImmType4:$in), 7102 (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in), 7103 (i32 24)), 7104 ssub)>; 7105} 7106 7107def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7108def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7109def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7110def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7111 7112def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7113def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7114def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7115def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7116 7117def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7118 (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; 7119def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7120 (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; 7121def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7122 (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; 7123def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7124 (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; 7125 7126let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 7127// EDIT per word: 2s & 4s with MSL shifter 7128def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", 7129 [(set (v2i32 V64:$Rd), 7130 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7131def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", 7132 [(set (v4i32 V128:$Rd), 7133 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7134 7135// Per byte: 8b & 16b 7136def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, 7137 "movi", ".8b", 7138 [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; 7139 7140def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, 7141 "movi", ".16b", 7142 [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; 7143} 7144 7145// AdvSIMD MVNI 7146 7147// EDIT per word & halfword: 2s, 4h, 4s, & 8h 7148let isReMaterializable = 1, isAsCheapAsAMove = 1 in 7149defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; 7150 7151def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7152def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7153def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7154def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7155 7156def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7157def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7158def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7159def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7160 7161def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7162 (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; 7163def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7164 (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; 7165def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7166 (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; 7167def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7168 (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; 7169 7170// EDIT per word: 2s & 4s with MSL shifter 7171let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 7172def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", 7173 [(set (v2i32 V64:$Rd), 7174 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7175def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", 7176 [(set (v4i32 V128:$Rd), 7177 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7178} 7179 7180//---------------------------------------------------------------------------- 7181// AdvSIMD indexed element 7182//---------------------------------------------------------------------------- 7183 7184let hasSideEffects = 0 in { 7185 defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; 7186 defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; 7187} 7188 7189// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the 7190// instruction expects the addend first, while the intrinsic expects it last. 7191 7192// On the other hand, there are quite a few valid combinatorial options due to 7193// the commutativity of multiplication and the fact that (-x) * y = x * (-y). 7194defm : SIMDFPIndexedTiedPatterns<"FMLA", 7195 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>; 7196defm : SIMDFPIndexedTiedPatterns<"FMLA", 7197 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>; 7198 7199defm : SIMDFPIndexedTiedPatterns<"FMLS", 7200 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 7201defm : SIMDFPIndexedTiedPatterns<"FMLS", 7202 TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; 7203defm : SIMDFPIndexedTiedPatterns<"FMLS", 7204 TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; 7205defm : SIMDFPIndexedTiedPatterns<"FMLS", 7206 TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; 7207 7208multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { 7209 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit 7210 // and DUP scalar. 7211 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 7212 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 7213 VectorIndexS:$idx))), 7214 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; 7215 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 7216 (v2f32 (AArch64duplane32 7217 (v4f32 (insert_subvector undef, 7218 (v2f32 (fneg V64:$Rm)), 7219 (i64 0))), 7220 VectorIndexS:$idx)))), 7221 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 7222 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 7223 VectorIndexS:$idx)>; 7224 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 7225 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 7226 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 7227 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 7228 7229 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit 7230 // and DUP scalar. 7231 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 7232 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 7233 VectorIndexS:$idx))), 7234 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, 7235 VectorIndexS:$idx)>; 7236 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 7237 (v4f32 (AArch64duplane32 7238 (v4f32 (insert_subvector undef, 7239 (v2f32 (fneg V64:$Rm)), 7240 (i64 0))), 7241 VectorIndexS:$idx)))), 7242 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 7243 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 7244 VectorIndexS:$idx)>; 7245 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 7246 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 7247 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 7248 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 7249 7250 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar 7251 // (DUPLANE from 64-bit would be trivial). 7252 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 7253 (AArch64duplane64 (v2f64 (fneg V128:$Rm)), 7254 VectorIndexD:$idx))), 7255 (FMLSv2i64_indexed 7256 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; 7257 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 7258 (AArch64dup (f64 (fneg FPR64Op:$Rm))))), 7259 (FMLSv2i64_indexed V128:$Rd, V128:$Rn, 7260 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; 7261 7262 // 2 variants for 32-bit scalar version: extract from .2s or from .4s 7263 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 7264 (vector_extract (v4f32 (fneg V128:$Rm)), 7265 VectorIndexS:$idx))), 7266 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 7267 V128:$Rm, VectorIndexS:$idx)>; 7268 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 7269 (vector_extract (v4f32 (insert_subvector undef, 7270 (v2f32 (fneg V64:$Rm)), 7271 (i64 0))), 7272 VectorIndexS:$idx))), 7273 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 7274 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; 7275 7276 // 1 variant for 64-bit scalar version: extract from .1d or from .2d 7277 def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), 7278 (vector_extract (v2f64 (fneg V128:$Rm)), 7279 VectorIndexS:$idx))), 7280 (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, 7281 V128:$Rm, VectorIndexS:$idx)>; 7282} 7283 7284defm : FMLSIndexedAfterNegPatterns< 7285 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 7286defm : FMLSIndexedAfterNegPatterns< 7287 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >; 7288 7289defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; 7290defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>; 7291 7292def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 7293 (FMULv2i32_indexed V64:$Rn, 7294 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 7295 (i64 0))>; 7296def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 7297 (FMULv4i32_indexed V128:$Rn, 7298 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 7299 (i64 0))>; 7300def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), 7301 (FMULv2i64_indexed V128:$Rn, 7302 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), 7303 (i64 0))>; 7304 7305defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; 7306defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 7307 7308defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane, 7309 int_aarch64_neon_sqdmulh_laneq>; 7310defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane, 7311 int_aarch64_neon_sqrdmulh_laneq>; 7312 7313// Generated by MachineCombine 7314defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>; 7315defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; 7316 7317defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; 7318defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", 7319 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 7320defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", 7321 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 7322defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>; 7323defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", 7324 int_aarch64_neon_sqadd>; 7325defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", 7326 int_aarch64_neon_sqsub>; 7327defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", 7328 int_aarch64_neon_sqrdmlah>; 7329defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", 7330 int_aarch64_neon_sqrdmlsh>; 7331defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; 7332defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", 7333 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 7334defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", 7335 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 7336defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>; 7337 7338// A scalar sqdmull with the second operand being a vector lane can be 7339// handled directly with the indexed instruction encoding. 7340def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 7341 (vector_extract (v4i32 V128:$Vm), 7342 VectorIndexS:$idx)), 7343 (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; 7344 7345//---------------------------------------------------------------------------- 7346// AdvSIMD scalar shift instructions 7347//---------------------------------------------------------------------------- 7348defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; 7349defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; 7350defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; 7351defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; 7352// Codegen patterns for the above. We don't put these directly on the 7353// instructions because TableGen's type inference can't handle the truth. 7354// Having the same base pattern for fp <--> int totally freaks it out. 7355def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), 7356 (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; 7357def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), 7358 (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; 7359def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), 7360 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 7361def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), 7362 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 7363def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), 7364 vecshiftR64:$imm)), 7365 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 7366def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), 7367 vecshiftR64:$imm)), 7368 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 7369def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), 7370 (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 7371def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 7372 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7373def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), 7374 vecshiftR64:$imm)), 7375 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7376def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 7377 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7378def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), 7379 vecshiftR64:$imm)), 7380 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7381def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), 7382 (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 7383 7384// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported. 7385 7386def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), 7387 (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7388def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), 7389 (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7390def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 7391 (SCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>; 7392def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp 7393 (and FPR32:$Rn, (i32 65535)), 7394 vecshiftR16:$imm)), 7395 (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7396def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), 7397 (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7398def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 7399 (UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>; 7400def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), 7401 (i32 (INSERT_SUBREG 7402 (i32 (IMPLICIT_DEF)), 7403 (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), 7404 hsub))>; 7405def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), 7406 (i64 (INSERT_SUBREG 7407 (i64 (IMPLICIT_DEF)), 7408 (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), 7409 hsub))>; 7410def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), 7411 (i32 (INSERT_SUBREG 7412 (i32 (IMPLICIT_DEF)), 7413 (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), 7414 hsub))>; 7415def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), 7416 (i64 (INSERT_SUBREG 7417 (i64 (IMPLICIT_DEF)), 7418 (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), 7419 hsub))>; 7420def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 7421 (i32 (INSERT_SUBREG 7422 (i32 (IMPLICIT_DEF)), 7423 (FACGE16 FPR16:$Rn, FPR16:$Rm), 7424 hsub))>; 7425def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 7426 (i32 (INSERT_SUBREG 7427 (i32 (IMPLICIT_DEF)), 7428 (FACGT16 FPR16:$Rn, FPR16:$Rm), 7429 hsub))>; 7430 7431defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; 7432defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; 7433defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", 7434 int_aarch64_neon_sqrshrn>; 7435defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", 7436 int_aarch64_neon_sqrshrun>; 7437defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 7438defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 7439defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", 7440 int_aarch64_neon_sqshrn>; 7441defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", 7442 int_aarch64_neon_sqshrun>; 7443defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; 7444defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; 7445defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", 7446 TriOpFrag<(add node:$LHS, 7447 (AArch64srshri node:$MHS, node:$RHS))>>; 7448defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; 7449defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", 7450 TriOpFrag<(add_and_or_is_add node:$LHS, 7451 (AArch64vashr node:$MHS, node:$RHS))>>; 7452defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", 7453 int_aarch64_neon_uqrshrn>; 7454defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 7455defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", 7456 int_aarch64_neon_uqshrn>; 7457defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; 7458defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", 7459 TriOpFrag<(add node:$LHS, 7460 (AArch64urshri node:$MHS, node:$RHS))>>; 7461defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; 7462defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", 7463 TriOpFrag<(add_and_or_is_add node:$LHS, 7464 (AArch64vlshr node:$MHS, node:$RHS))>>; 7465 7466//---------------------------------------------------------------------------- 7467// AdvSIMD vector shift instructions 7468//---------------------------------------------------------------------------- 7469defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; 7470defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; 7471defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", 7472 int_aarch64_neon_vcvtfxs2fp>; 7473defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>; 7474defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; 7475 7476// X << 1 ==> X + X 7477class SHLToADDPat<ValueType ty, RegisterClass regtype> 7478 : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))), 7479 (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>; 7480 7481def : SHLToADDPat<v16i8, FPR128>; 7482def : SHLToADDPat<v8i16, FPR128>; 7483def : SHLToADDPat<v4i32, FPR128>; 7484def : SHLToADDPat<v2i64, FPR128>; 7485def : SHLToADDPat<v8i8, FPR64>; 7486def : SHLToADDPat<v4i16, FPR64>; 7487def : SHLToADDPat<v2i32, FPR64>; 7488 7489defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", 7490 BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; 7491defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>; 7492def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 7493 (i32 vecshiftL64:$imm))), 7494 (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; 7495defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", 7496 int_aarch64_neon_sqrshrn>; 7497defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", 7498 int_aarch64_neon_sqrshrun>; 7499defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 7500defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 7501defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", 7502 int_aarch64_neon_sqshrn>; 7503defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", 7504 int_aarch64_neon_sqshrun>; 7505defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>; 7506def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 7507 (i32 vecshiftR64:$imm))), 7508 (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; 7509defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; 7510defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", 7511 TriOpFrag<(add node:$LHS, 7512 (AArch64srshri node:$MHS, node:$RHS))> >; 7513defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", 7514 BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; 7515 7516defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; 7517defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", 7518 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; 7519defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", 7520 int_aarch64_neon_vcvtfxu2fp>; 7521defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", 7522 int_aarch64_neon_uqrshrn>; 7523defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 7524defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", 7525 int_aarch64_neon_uqshrn>; 7526defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; 7527defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", 7528 TriOpFrag<(add node:$LHS, 7529 (AArch64urshri node:$MHS, node:$RHS))> >; 7530defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", 7531 BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; 7532defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; 7533defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", 7534 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; 7535 7536// RADDHN patterns for when RSHRN shifts by half the size of the vector element 7537def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))), 7538 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; 7539def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))), 7540 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; 7541let AddedComplexity = 5 in 7542def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))), 7543 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; 7544def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))), 7545 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; 7546def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))), 7547 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; 7548def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))), 7549 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; 7550 7551// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element 7552def : Pat<(v16i8 (concat_vectors 7553 (v8i8 V64:$Vd), 7554 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))), 7555 (RADDHNv8i16_v16i8 7556 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7557 (v8i16 (MOVIv2d_ns (i32 0))))>; 7558def : Pat<(v8i16 (concat_vectors 7559 (v4i16 V64:$Vd), 7560 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))), 7561 (RADDHNv4i32_v8i16 7562 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7563 (v4i32 (MOVIv2d_ns (i32 0))))>; 7564let AddedComplexity = 5 in 7565def : Pat<(v4i32 (concat_vectors 7566 (v2i32 V64:$Vd), 7567 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))), 7568 (RADDHNv2i64_v4i32 7569 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7570 (v2i64 (MOVIv2d_ns (i32 0))))>; 7571def : Pat<(v16i8 (concat_vectors 7572 (v8i8 V64:$Vd), 7573 (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))), 7574 (RADDHNv8i16_v16i8 7575 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7576 (v8i16 (MOVIv2d_ns (i32 0))))>; 7577def : Pat<(v8i16 (concat_vectors 7578 (v4i16 V64:$Vd), 7579 (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))), 7580 (RADDHNv4i32_v8i16 7581 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7582 (v4i32 (MOVIv2d_ns (i32 0))))>; 7583def : Pat<(v4i32 (concat_vectors 7584 (v2i32 V64:$Vd), 7585 (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))), 7586 (RADDHNv2i64_v4i32 7587 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7588 (v2i64 (MOVIv2d_ns (i32 0))))>; 7589 7590// SHRN patterns for when a logical right shift was used instead of arithmetic 7591// (the immediate guarantees no sign bits actually end up in the result so it 7592// doesn't matter). 7593def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), 7594 (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; 7595def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), 7596 (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; 7597def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), 7598 (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; 7599 7600def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), 7601 (trunc (AArch64vlshr (v8i16 V128:$Rn), 7602 vecshiftR16Narrow:$imm)))), 7603 (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7604 V128:$Rn, vecshiftR16Narrow:$imm)>; 7605def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), 7606 (trunc (AArch64vlshr (v4i32 V128:$Rn), 7607 vecshiftR32Narrow:$imm)))), 7608 (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7609 V128:$Rn, vecshiftR32Narrow:$imm)>; 7610def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), 7611 (trunc (AArch64vlshr (v2i64 V128:$Rn), 7612 vecshiftR64Narrow:$imm)))), 7613 (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7614 V128:$Rn, vecshiftR32Narrow:$imm)>; 7615 7616// Vector sign and zero extensions are implemented with SSHLL and USSHLL. 7617// Anyexts are implemented as zexts. 7618def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; 7619def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 7620def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 7621def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; 7622def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 7623def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 7624def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; 7625def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 7626def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 7627// Also match an extend from the upper half of a 128 bit source register. 7628def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), 7629 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 7630def : Pat<(v8i16 (zext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), 7631 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 7632def : Pat<(v8i16 (sext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), 7633 (SSHLLv16i8_shift V128:$Rn, (i32 0))>; 7634def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), 7635 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 7636def : Pat<(v4i32 (zext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), 7637 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 7638def : Pat<(v4i32 (sext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), 7639 (SSHLLv8i16_shift V128:$Rn, (i32 0))>; 7640def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), 7641 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 7642def : Pat<(v2i64 (zext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), 7643 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 7644def : Pat<(v2i64 (sext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), 7645 (SSHLLv4i32_shift V128:$Rn, (i32 0))>; 7646 7647// Vector shift sxtl aliases 7648def : InstAlias<"sxtl.8h $dst, $src1", 7649 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7650def : InstAlias<"sxtl $dst.8h, $src1.8b", 7651 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7652def : InstAlias<"sxtl.4s $dst, $src1", 7653 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7654def : InstAlias<"sxtl $dst.4s, $src1.4h", 7655 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7656def : InstAlias<"sxtl.2d $dst, $src1", 7657 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7658def : InstAlias<"sxtl $dst.2d, $src1.2s", 7659 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7660 7661// Vector shift sxtl2 aliases 7662def : InstAlias<"sxtl2.8h $dst, $src1", 7663 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7664def : InstAlias<"sxtl2 $dst.8h, $src1.16b", 7665 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7666def : InstAlias<"sxtl2.4s $dst, $src1", 7667 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7668def : InstAlias<"sxtl2 $dst.4s, $src1.8h", 7669 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7670def : InstAlias<"sxtl2.2d $dst, $src1", 7671 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7672def : InstAlias<"sxtl2 $dst.2d, $src1.4s", 7673 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7674 7675// Vector shift uxtl aliases 7676def : InstAlias<"uxtl.8h $dst, $src1", 7677 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7678def : InstAlias<"uxtl $dst.8h, $src1.8b", 7679 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7680def : InstAlias<"uxtl.4s $dst, $src1", 7681 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7682def : InstAlias<"uxtl $dst.4s, $src1.4h", 7683 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7684def : InstAlias<"uxtl.2d $dst, $src1", 7685 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7686def : InstAlias<"uxtl $dst.2d, $src1.2s", 7687 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7688 7689// Vector shift uxtl2 aliases 7690def : InstAlias<"uxtl2.8h $dst, $src1", 7691 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7692def : InstAlias<"uxtl2 $dst.8h, $src1.16b", 7693 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7694def : InstAlias<"uxtl2.4s $dst, $src1", 7695 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7696def : InstAlias<"uxtl2 $dst.4s, $src1.8h", 7697 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7698def : InstAlias<"uxtl2.2d $dst, $src1", 7699 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7700def : InstAlias<"uxtl2 $dst.2d, $src1.4s", 7701 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7702 7703// If an integer is about to be converted to a floating point value, 7704// just load it on the floating point unit. 7705// These patterns are more complex because floating point loads do not 7706// support sign extension. 7707// The sign extension has to be explicitly added and is only supported for 7708// one step: byte-to-half, half-to-word, word-to-doubleword. 7709// SCVTF GPR -> FPR is 9 cycles. 7710// SCVTF FPR -> FPR is 4 cyclces. 7711// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. 7712// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR 7713// and still being faster. 7714// However, this is not good for code size. 7715// 8-bits -> float. 2 sizes step-up. 7716class SExtLoadi8CVTf32Pat<dag addrmode, dag INST> 7717 : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), 7718 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7719 (SSHLLv4i16_shift 7720 (f64 7721 (EXTRACT_SUBREG 7722 (SSHLLv8i8_shift 7723 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7724 INST, 7725 bsub), 7726 0), 7727 dsub)), 7728 0), 7729 ssub)))>, 7730 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7731 7732def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), 7733 (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; 7734def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), 7735 (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; 7736def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), 7737 (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; 7738def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), 7739 (LDURBi GPR64sp:$Rn, simm9:$offset)>; 7740 7741// 16-bits -> float. 1 size step-up. 7742class SExtLoadi16CVTf32Pat<dag addrmode, dag INST> 7743 : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7744 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7745 (SSHLLv4i16_shift 7746 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7747 INST, 7748 hsub), 7749 0), 7750 ssub)))>, 7751 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7752 7753def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7754 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7755def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7756 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7757def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7758 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7759def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7760 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7761 7762// 32-bits to 32-bits are handled in target specific dag combine: 7763// performIntToFpCombine. 7764// 64-bits integer to 32-bits floating point, not possible with 7765// SCVTF on floating point registers (both source and destination 7766// must have the same size). 7767 7768// Here are the patterns for 8, 16, 32, and 64-bits to double. 7769// 8-bits -> double. 3 size step-up: give up. 7770// 16-bits -> double. 2 size step. 7771class SExtLoadi16CVTf64Pat<dag addrmode, dag INST> 7772 : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7773 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7774 (SSHLLv2i32_shift 7775 (f64 7776 (EXTRACT_SUBREG 7777 (SSHLLv4i16_shift 7778 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7779 INST, 7780 hsub), 7781 0), 7782 dsub)), 7783 0), 7784 dsub)))>, 7785 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7786 7787def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7788 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7789def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7790 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7791def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7792 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7793def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7794 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7795// 32-bits -> double. 1 size step-up. 7796class SExtLoadi32CVTf64Pat<dag addrmode, dag INST> 7797 : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), 7798 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7799 (SSHLLv2i32_shift 7800 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7801 INST, 7802 ssub), 7803 0), 7804 dsub)))>, 7805 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7806 7807def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), 7808 (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; 7809def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), 7810 (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; 7811def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), 7812 (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; 7813def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), 7814 (LDURSi GPR64sp:$Rn, simm9:$offset)>; 7815 7816// 64-bits -> double are handled in target specific dag combine: 7817// performIntToFpCombine. 7818 7819 7820//---------------------------------------------------------------------------- 7821// AdvSIMD Load-Store Structure 7822//---------------------------------------------------------------------------- 7823defm LD1 : SIMDLd1Multiple<"ld1">; 7824defm LD2 : SIMDLd2Multiple<"ld2">; 7825defm LD3 : SIMDLd3Multiple<"ld3">; 7826defm LD4 : SIMDLd4Multiple<"ld4">; 7827 7828defm ST1 : SIMDSt1Multiple<"st1">; 7829defm ST2 : SIMDSt2Multiple<"st2">; 7830defm ST3 : SIMDSt3Multiple<"st3">; 7831defm ST4 : SIMDSt4Multiple<"st4">; 7832 7833class Ld1Pat<ValueType ty, Instruction INST> 7834 : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; 7835 7836def : Ld1Pat<v16i8, LD1Onev16b>; 7837def : Ld1Pat<v8i16, LD1Onev8h>; 7838def : Ld1Pat<v4i32, LD1Onev4s>; 7839def : Ld1Pat<v2i64, LD1Onev2d>; 7840def : Ld1Pat<v8i8, LD1Onev8b>; 7841def : Ld1Pat<v4i16, LD1Onev4h>; 7842def : Ld1Pat<v2i32, LD1Onev2s>; 7843def : Ld1Pat<v1i64, LD1Onev1d>; 7844 7845class St1Pat<ValueType ty, Instruction INST> 7846 : Pat<(store ty:$Vt, GPR64sp:$Rn), 7847 (INST ty:$Vt, GPR64sp:$Rn)>; 7848 7849def : St1Pat<v16i8, ST1Onev16b>; 7850def : St1Pat<v8i16, ST1Onev8h>; 7851def : St1Pat<v4i32, ST1Onev4s>; 7852def : St1Pat<v2i64, ST1Onev2d>; 7853def : St1Pat<v8i8, ST1Onev8b>; 7854def : St1Pat<v4i16, ST1Onev4h>; 7855def : St1Pat<v2i32, ST1Onev2s>; 7856def : St1Pat<v1i64, ST1Onev1d>; 7857 7858//--- 7859// Single-element 7860//--- 7861 7862defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; 7863defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; 7864defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; 7865defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; 7866let mayLoad = 1, hasSideEffects = 0 in { 7867defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; 7868defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; 7869defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; 7870defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; 7871defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; 7872defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; 7873defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; 7874defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; 7875defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; 7876defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; 7877defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; 7878defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; 7879defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; 7880defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; 7881defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; 7882defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; 7883} 7884 7885def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7886 (LD1Rv8b GPR64sp:$Rn)>; 7887def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7888 (LD1Rv16b GPR64sp:$Rn)>; 7889def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7890 (LD1Rv4h GPR64sp:$Rn)>; 7891def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7892 (LD1Rv8h GPR64sp:$Rn)>; 7893def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7894 (LD1Rv2s GPR64sp:$Rn)>; 7895def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7896 (LD1Rv4s GPR64sp:$Rn)>; 7897def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7898 (LD1Rv2d GPR64sp:$Rn)>; 7899def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7900 (LD1Rv1d GPR64sp:$Rn)>; 7901 7902def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7903 (LD1Rv8b GPR64sp:$Rn)>; 7904def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))), 7905 (LD1Rv16b GPR64sp:$Rn)>; 7906def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7907 (LD1Rv4h GPR64sp:$Rn)>; 7908def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))), 7909 (LD1Rv8h GPR64sp:$Rn)>; 7910def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7911 (LD1Rv2s GPR64sp:$Rn)>; 7912def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))), 7913 (LD1Rv4s GPR64sp:$Rn)>; 7914def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))), 7915 (LD1Rv2d GPR64sp:$Rn)>; 7916 7917// Grab the floating point version too 7918def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7919 (LD1Rv2s GPR64sp:$Rn)>; 7920def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7921 (LD1Rv4s GPR64sp:$Rn)>; 7922def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7923 (LD1Rv2d GPR64sp:$Rn)>; 7924def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7925 (LD1Rv1d GPR64sp:$Rn)>; 7926def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7927 (LD1Rv4h GPR64sp:$Rn)>; 7928def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7929 (LD1Rv8h GPR64sp:$Rn)>; 7930def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7931 (LD1Rv4h GPR64sp:$Rn)>; 7932def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7933 (LD1Rv8h GPR64sp:$Rn)>; 7934 7935class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, 7936 ValueType VTy, ValueType STy, Instruction LD1> 7937 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7938 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7939 (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; 7940 7941def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; 7942def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; 7943def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; 7944def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; 7945def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; 7946def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; 7947def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>; 7948def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>; 7949 7950// Generate LD1 for extload if memory type does not match the 7951// destination type, for example: 7952// 7953// (v4i32 (insert_vector_elt (load anyext from i8) idx)) 7954// 7955// In this case, the index must be adjusted to match LD1 type. 7956// 7957class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand 7958 VecIndex, ValueType VTy, ValueType STy, 7959 Instruction LD1, SDNodeXForm IdxOp> 7960 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7961 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7962 (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>; 7963 7964class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex, 7965 ValueType VTy, ValueType STy, Instruction LD1, 7966 SDNodeXForm IdxOp> 7967 : Pat<(vector_insert (VTy VecListOne64:$Rd), 7968 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7969 (EXTRACT_SUBREG 7970 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 7971 (IdxOp VecIndex:$idx), GPR64sp:$Rn), 7972 dsub)>; 7973 7974def VectorIndexStoH : SDNodeXForm<imm, [{ 7975 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7976}]>; 7977def VectorIndexStoB : SDNodeXForm<imm, [{ 7978 return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64); 7979}]>; 7980def VectorIndexHtoB : SDNodeXForm<imm, [{ 7981 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7982}]>; 7983 7984def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>; 7985def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>; 7986def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>; 7987 7988def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>; 7989def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>; 7990def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>; 7991 7992// Same as above, but the first element is populated using 7993// scalar_to_vector + insert_subvector instead of insert_vector_elt. 7994let Predicates = [IsNeonAvailable] in { 7995 class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy, 7996 SDPatternOperator ExtLoad, Instruction LD1> 7997 : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))), 7998 (ResultTy (EXTRACT_SUBREG 7999 (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>; 8000 8001 def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>; 8002 def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>; 8003 def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>; 8004} 8005class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, 8006 ValueType VTy, ValueType STy, Instruction LD1> 8007 : Pat<(vector_insert (VTy VecListOne64:$Rd), 8008 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 8009 (EXTRACT_SUBREG 8010 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 8011 VecIndex:$idx, GPR64sp:$Rn), 8012 dsub)>; 8013 8014def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; 8015def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; 8016def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; 8017def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; 8018def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>; 8019def : Ld1Lane64Pat<load, VectorIndexH, v4bf16, bf16, LD1i16>; 8020 8021 8022defm LD1 : SIMDLdSt1SingleAliases<"ld1">; 8023defm LD2 : SIMDLdSt2SingleAliases<"ld2">; 8024defm LD3 : SIMDLdSt3SingleAliases<"ld3">; 8025defm LD4 : SIMDLdSt4SingleAliases<"ld4">; 8026 8027// Stores 8028defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; 8029defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; 8030defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; 8031defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; 8032 8033let AddedComplexity = 19 in 8034class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, 8035 ValueType VTy, ValueType STy, Instruction ST1> 8036 : Pat<(scalar_store 8037 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 8038 GPR64sp:$Rn), 8039 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; 8040 8041def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; 8042def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; 8043def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; 8044def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; 8045def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; 8046def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; 8047def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>; 8048def : St1Lane128Pat<store, VectorIndexH, v8bf16, bf16, ST1i16>; 8049 8050let AddedComplexity = 19 in 8051class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, 8052 ValueType VTy, ValueType STy, Instruction ST1> 8053 : Pat<(scalar_store 8054 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 8055 GPR64sp:$Rn), 8056 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 8057 VecIndex:$idx, GPR64sp:$Rn)>; 8058 8059def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; 8060def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; 8061def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; 8062def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; 8063def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>; 8064def : St1Lane64Pat<store, VectorIndexH, v4bf16, bf16, ST1i16>; 8065 8066multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, 8067 ValueType VTy, ValueType STy, Instruction ST1, 8068 int offset> { 8069 def : Pat<(scalar_store 8070 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 8071 GPR64sp:$Rn, offset), 8072 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 8073 VecIndex:$idx, GPR64sp:$Rn, XZR)>; 8074 8075 def : Pat<(scalar_store 8076 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 8077 GPR64sp:$Rn, GPR64:$Rm), 8078 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 8079 VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 8080} 8081 8082defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; 8083defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, 8084 2>; 8085defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; 8086defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; 8087defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; 8088defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; 8089defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>; 8090defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>; 8091 8092multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, 8093 ValueType VTy, ValueType STy, Instruction ST1, 8094 int offset> { 8095 def : Pat<(scalar_store 8096 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 8097 GPR64sp:$Rn, offset), 8098 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; 8099 8100 def : Pat<(scalar_store 8101 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 8102 GPR64sp:$Rn, GPR64:$Rm), 8103 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 8104} 8105 8106defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, 8107 1>; 8108defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, 8109 2>; 8110defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; 8111defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; 8112defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; 8113defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; 8114defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>; 8115defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>; 8116 8117let mayStore = 1, hasSideEffects = 0 in { 8118defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; 8119defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; 8120defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; 8121defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; 8122defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; 8123defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; 8124defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; 8125defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; 8126defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; 8127defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; 8128defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; 8129defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; 8130} 8131 8132defm ST1 : SIMDLdSt1SingleAliases<"st1">; 8133defm ST2 : SIMDLdSt2SingleAliases<"st2">; 8134defm ST3 : SIMDLdSt3SingleAliases<"st3">; 8135defm ST4 : SIMDLdSt4SingleAliases<"st4">; 8136 8137//---------------------------------------------------------------------------- 8138// Crypto extensions 8139//---------------------------------------------------------------------------- 8140 8141let Predicates = [HasAES] in { 8142def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; 8143def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; 8144def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; 8145def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; 8146} 8147 8148// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required 8149// for AES fusion on some CPUs. 8150let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 8151def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 8152 Sched<[WriteVq]>; 8153def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 8154 Sched<[WriteVq]>; 8155} 8156 8157// Only use constrained versions of AES(I)MC instructions if they are paired with 8158// AESE/AESD. 8159def : Pat<(v16i8 (int_aarch64_crypto_aesmc 8160 (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), 8161 (v16i8 V128:$src2))))), 8162 (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), 8163 (v16i8 V128:$src2)))))>, 8164 Requires<[HasFuseAES]>; 8165 8166def : Pat<(v16i8 (int_aarch64_crypto_aesimc 8167 (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), 8168 (v16i8 V128:$src2))))), 8169 (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), 8170 (v16i8 V128:$src2)))))>, 8171 Requires<[HasFuseAES]>; 8172 8173let Predicates = [HasSHA2] in { 8174def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; 8175def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; 8176def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; 8177def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; 8178def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; 8179def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; 8180def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; 8181 8182def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; 8183def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; 8184def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; 8185} 8186 8187//---------------------------------------------------------------------------- 8188// Compiler-pseudos 8189//---------------------------------------------------------------------------- 8190// FIXME: Like for X86, these should go in their own separate .td file. 8191 8192// For an anyext, we don't care what the high bits are, so we can perform an 8193// INSERT_SUBREF into an IMPLICIT_DEF. 8194def : Pat<(i64 (anyext GPR32:$src)), 8195 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; 8196 8197// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and 8198// then assert the extension has happened. 8199def : Pat<(i64 (zext GPR32:$src)), 8200 (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 8201 8202// To sign extend, we use a signed bitfield move instruction (SBFM) on the 8203// containing super-reg. 8204def : Pat<(i64 (sext GPR32:$src)), 8205 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 8206def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; 8207def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; 8208def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; 8209def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; 8210def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; 8211def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; 8212def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; 8213 8214def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), 8215 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 8216 (i64 (i32shift_sext_i8 imm0_31:$imm)))>; 8217def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), 8218 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 8219 (i64 (i64shift_sext_i8 imm0_63:$imm)))>; 8220 8221def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), 8222 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 8223 (i64 (i32shift_sext_i16 imm0_31:$imm)))>; 8224def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), 8225 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 8226 (i64 (i64shift_sext_i16 imm0_63:$imm)))>; 8227 8228def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), 8229 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 8230 (i64 (i64shift_a imm0_63:$imm)), 8231 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 8232 8233def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)), 8234 (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 8235 (i64 (i64shift_a imm0_63:$imm)), 8236 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 8237 8238// sra patterns have an AddedComplexity of 10, so make sure we have a higher 8239// AddedComplexity for the following patterns since we want to match sext + sra 8240// patterns before we attempt to match a single sra node. 8241let AddedComplexity = 20 in { 8242// We support all sext + sra combinations which preserve at least one bit of the 8243// original value which is to be sign extended. E.g. we support shifts up to 8244// bitwidth-1 bits. 8245def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), 8246 (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; 8247def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), 8248 (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; 8249 8250def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), 8251 (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; 8252def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), 8253 (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; 8254 8255def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), 8256 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 8257 (i64 imm0_31:$imm), 31)>; 8258} // AddedComplexity = 20 8259 8260// To truncate, we can simply extract from a subregister. 8261def : Pat<(i32 (trunc GPR64sp:$src)), 8262 (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; 8263 8264// __builtin_trap() uses the BRK instruction on AArch64. 8265def : Pat<(trap), (BRK 1)>; 8266def : Pat<(debugtrap), (BRK 0xF000)>; 8267 8268def ubsan_trap_xform : SDNodeXForm<timm, [{ 8269 return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32); 8270}]>; 8271 8272def ubsan_trap_imm : TImmLeaf<i32, [{ 8273 return isUInt<8>(Imm); 8274}], ubsan_trap_xform>; 8275 8276def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>; 8277 8278// Multiply high patterns which multiply the lower subvector using smull/umull 8279// and the upper subvector with smull2/umull2. Then shuffle the high the high 8280// part of both results together. 8281def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)), 8282 (UZP2v16i8 8283 (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 8284 (EXTRACT_SUBREG V128:$Rm, dsub)), 8285 (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 8286def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)), 8287 (UZP2v8i16 8288 (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 8289 (EXTRACT_SUBREG V128:$Rm, dsub)), 8290 (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 8291def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)), 8292 (UZP2v4i32 8293 (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 8294 (EXTRACT_SUBREG V128:$Rm, dsub)), 8295 (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 8296 8297def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)), 8298 (UZP2v16i8 8299 (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 8300 (EXTRACT_SUBREG V128:$Rm, dsub)), 8301 (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 8302def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)), 8303 (UZP2v8i16 8304 (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 8305 (EXTRACT_SUBREG V128:$Rm, dsub)), 8306 (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 8307def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)), 8308 (UZP2v4i32 8309 (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 8310 (EXTRACT_SUBREG V128:$Rm, dsub)), 8311 (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 8312 8313// Conversions within AdvSIMD types in the same register size are free. 8314// But because we need a consistent lane ordering, in big endian many 8315// conversions require one or more REV instructions. 8316// 8317// Consider a simple memory load followed by a bitconvert then a store. 8318// v0 = load v2i32 8319// v1 = BITCAST v2i32 v0 to v4i16 8320// store v4i16 v2 8321// 8322// In big endian mode every memory access has an implicit byte swap. LDR and 8323// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that 8324// is, they treat the vector as a sequence of elements to be byte-swapped. 8325// The two pairs of instructions are fundamentally incompatible. We've decided 8326// to use LD1/ST1 only to simplify compiler implementation. 8327// 8328// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes 8329// the original code sequence: 8330// v0 = load v2i32 8331// v1 = REV v2i32 (implicit) 8332// v2 = BITCAST v2i32 v1 to v4i16 8333// v3 = REV v4i16 v2 (implicit) 8334// store v4i16 v3 8335// 8336// But this is now broken - the value stored is different to the value loaded 8337// due to lane reordering. To fix this, on every BITCAST we must perform two 8338// other REVs: 8339// v0 = load v2i32 8340// v1 = REV v2i32 (implicit) 8341// v2 = REV v2i32 8342// v3 = BITCAST v2i32 v2 to v4i16 8343// v4 = REV v4i16 8344// v5 = REV v4i16 v4 (implicit) 8345// store v4i16 v5 8346// 8347// This means an extra two instructions, but actually in most cases the two REV 8348// instructions can be combined into one. For example: 8349// (REV64_2s (REV64_4h X)) === (REV32_4h X) 8350// 8351// There is also no 128-bit REV instruction. This must be synthesized with an 8352// EXT instruction. 8353// 8354// Most bitconverts require some sort of conversion. The only exceptions are: 8355// a) Identity conversions - vNfX <-> vNiX 8356// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX 8357// 8358 8359// Natural vector casts (64 bit) 8360foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 8361 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 8362 def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))), 8363 (VT FPR64:$src)>; 8364 8365// Natural vector casts (128 bit) 8366foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 8367 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 8368 def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))), 8369 (VT FPR128:$src)>; 8370 8371let Predicates = [IsLE] in { 8372def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8373def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8374def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8375def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8376def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8377def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8378 8379def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 8380 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8381def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 8382 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8383def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 8384 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8385def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 8386 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8387def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 8388 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8389def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 8390 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8391def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 8392 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8393} 8394let Predicates = [IsBE] in { 8395def : Pat<(v8i8 (bitconvert GPR64:$Xn)), 8396 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8397def : Pat<(v4i16 (bitconvert GPR64:$Xn)), 8398 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8399def : Pat<(v2i32 (bitconvert GPR64:$Xn)), 8400 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8401def : Pat<(v4f16 (bitconvert GPR64:$Xn)), 8402 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8403def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), 8404 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8405def : Pat<(v2f32 (bitconvert GPR64:$Xn)), 8406 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8407 8408def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 8409 (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8410def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 8411 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8412def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 8413 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8414def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 8415 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8416def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 8417 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8418def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 8419 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8420} 8421def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8422def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8423def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), 8424 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8425def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), 8426 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8427def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), 8428 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8429def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; 8430 8431def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), 8432 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; 8433def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), 8434 (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; 8435def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), 8436 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8437def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), 8438 (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; 8439def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 8440 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8441 8442def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>; 8443def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>; 8444 8445let Predicates = [IsLE] in { 8446def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; 8447def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; 8448def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; 8449def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; 8450def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>; 8451def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; 8452} 8453let Predicates = [IsBE] in { 8454def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), 8455 (v1i64 (REV64v2i32 FPR64:$src))>; 8456def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), 8457 (v1i64 (REV64v4i16 FPR64:$src))>; 8458def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), 8459 (v1i64 (REV64v8i8 FPR64:$src))>; 8460def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), 8461 (v1i64 (REV64v4i16 FPR64:$src))>; 8462def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), 8463 (v1i64 (REV64v4i16 FPR64:$src))>; 8464def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), 8465 (v1i64 (REV64v2i32 FPR64:$src))>; 8466} 8467def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; 8468def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 8469 8470let Predicates = [IsLE] in { 8471def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; 8472def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; 8473def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; 8474def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 8475def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; 8476def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; 8477def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>; 8478} 8479let Predicates = [IsBE] in { 8480def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), 8481 (v2i32 (REV64v2i32 FPR64:$src))>; 8482def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), 8483 (v2i32 (REV32v4i16 FPR64:$src))>; 8484def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), 8485 (v2i32 (REV32v8i8 FPR64:$src))>; 8486def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), 8487 (v2i32 (REV64v2i32 FPR64:$src))>; 8488def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), 8489 (v2i32 (REV64v2i32 FPR64:$src))>; 8490def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), 8491 (v2i32 (REV32v4i16 FPR64:$src))>; 8492def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), 8493 (v2i32 (REV32v4i16 FPR64:$src))>; 8494} 8495def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; 8496 8497let Predicates = [IsLE] in { 8498def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; 8499def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; 8500def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; 8501def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 8502def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; 8503def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; 8504} 8505let Predicates = [IsBE] in { 8506def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), 8507 (v4i16 (REV64v4i16 FPR64:$src))>; 8508def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), 8509 (v4i16 (REV32v4i16 FPR64:$src))>; 8510def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), 8511 (v4i16 (REV16v8i8 FPR64:$src))>; 8512def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), 8513 (v4i16 (REV64v4i16 FPR64:$src))>; 8514def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), 8515 (v4i16 (REV32v4i16 FPR64:$src))>; 8516def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), 8517 (v4i16 (REV64v4i16 FPR64:$src))>; 8518} 8519def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; 8520def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>; 8521 8522let Predicates = [IsLE] in { 8523def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; 8524def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; 8525def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; 8526def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; 8527def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; 8528def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; 8529 8530def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8531def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; 8532def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; 8533def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8534def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>; 8535def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8536} 8537let Predicates = [IsBE] in { 8538def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), 8539 (v4f16 (REV64v4i16 FPR64:$src))>; 8540def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), 8541 (v4f16 (REV32v4i16 FPR64:$src))>; 8542def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), 8543 (v4f16 (REV16v8i8 FPR64:$src))>; 8544def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), 8545 (v4f16 (REV64v4i16 FPR64:$src))>; 8546def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), 8547 (v4f16 (REV32v4i16 FPR64:$src))>; 8548def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), 8549 (v4f16 (REV64v4i16 FPR64:$src))>; 8550 8551def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), 8552 (v4bf16 (REV64v4i16 FPR64:$src))>; 8553def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), 8554 (v4bf16 (REV32v4i16 FPR64:$src))>; 8555def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), 8556 (v4bf16 (REV16v8i8 FPR64:$src))>; 8557def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), 8558 (v4bf16 (REV64v4i16 FPR64:$src))>; 8559def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), 8560 (v4bf16 (REV32v4i16 FPR64:$src))>; 8561def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), 8562 (v4bf16 (REV64v4i16 FPR64:$src))>; 8563} 8564def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; 8565def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; 8566 8567let Predicates = [IsLE] in { 8568def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; 8569def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; 8570def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; 8571def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 8572def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; 8573def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; 8574def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; 8575def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>; 8576} 8577let Predicates = [IsBE] in { 8578def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), 8579 (v8i8 (REV64v8i8 FPR64:$src))>; 8580def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), 8581 (v8i8 (REV32v8i8 FPR64:$src))>; 8582def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), 8583 (v8i8 (REV16v8i8 FPR64:$src))>; 8584def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), 8585 (v8i8 (REV64v8i8 FPR64:$src))>; 8586def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), 8587 (v8i8 (REV32v8i8 FPR64:$src))>; 8588def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), 8589 (v8i8 (REV64v8i8 FPR64:$src))>; 8590def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), 8591 (v8i8 (REV16v8i8 FPR64:$src))>; 8592def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), 8593 (v8i8 (REV16v8i8 FPR64:$src))>; 8594} 8595 8596let Predicates = [IsLE] in { 8597def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; 8598def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; 8599def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; 8600def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; 8601def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; 8602def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>; 8603} 8604let Predicates = [IsBE] in { 8605def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), 8606 (f64 (REV64v2i32 FPR64:$src))>; 8607def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), 8608 (f64 (REV64v4i16 FPR64:$src))>; 8609def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), 8610 (f64 (REV64v2i32 FPR64:$src))>; 8611def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), 8612 (f64 (REV64v8i8 FPR64:$src))>; 8613def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), 8614 (f64 (REV64v4i16 FPR64:$src))>; 8615def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), 8616 (f64 (REV64v4i16 FPR64:$src))>; 8617} 8618def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; 8619def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; 8620 8621let Predicates = [IsLE] in { 8622def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; 8623def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; 8624def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; 8625def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; 8626def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; 8627def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>; 8628} 8629let Predicates = [IsBE] in { 8630def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), 8631 (v1f64 (REV64v2i32 FPR64:$src))>; 8632def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), 8633 (v1f64 (REV64v4i16 FPR64:$src))>; 8634def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), 8635 (v1f64 (REV64v8i8 FPR64:$src))>; 8636def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), 8637 (v1f64 (REV64v2i32 FPR64:$src))>; 8638def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), 8639 (v1f64 (REV64v4i16 FPR64:$src))>; 8640def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), 8641 (v1f64 (REV64v4i16 FPR64:$src))>; 8642} 8643def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; 8644def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 8645 8646let Predicates = [IsLE] in { 8647def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; 8648def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; 8649def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; 8650def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; 8651def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 8652def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; 8653def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>; 8654} 8655let Predicates = [IsBE] in { 8656def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), 8657 (v2f32 (REV64v2i32 FPR64:$src))>; 8658def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), 8659 (v2f32 (REV32v4i16 FPR64:$src))>; 8660def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), 8661 (v2f32 (REV32v8i8 FPR64:$src))>; 8662def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), 8663 (v2f32 (REV64v2i32 FPR64:$src))>; 8664def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), 8665 (v2f32 (REV64v2i32 FPR64:$src))>; 8666def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), 8667 (v2f32 (REV32v4i16 FPR64:$src))>; 8668def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), 8669 (v2f32 (REV32v4i16 FPR64:$src))>; 8670} 8671def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; 8672 8673let Predicates = [IsLE] in { 8674def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; 8675def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; 8676def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; 8677def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; 8678def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; 8679def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; 8680def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>; 8681def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; 8682} 8683let Predicates = [IsBE] in { 8684def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), 8685 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8686def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), 8687 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8688 (REV64v4i32 FPR128:$src), (i32 8)))>; 8689def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), 8690 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8691 (REV64v8i16 FPR128:$src), (i32 8)))>; 8692def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), 8693 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8694 (REV64v8i16 FPR128:$src), (i32 8)))>; 8695def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), 8696 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8697 (REV64v8i16 FPR128:$src), (i32 8)))>; 8698def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), 8699 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8700def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), 8701 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8702 (REV64v4i32 FPR128:$src), (i32 8)))>; 8703def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), 8704 (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), 8705 (REV64v16i8 FPR128:$src), (i32 8)))>; 8706} 8707 8708let Predicates = [IsLE] in { 8709def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; 8710def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; 8711def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; 8712def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; 8713def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>; 8714def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; 8715def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; 8716} 8717let Predicates = [IsBE] in { 8718def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), 8719 (v2f64 (EXTv16i8 FPR128:$src, 8720 FPR128:$src, (i32 8)))>; 8721def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), 8722 (v2f64 (REV64v4i32 FPR128:$src))>; 8723def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), 8724 (v2f64 (REV64v8i16 FPR128:$src))>; 8725def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), 8726 (v2f64 (REV64v8i16 FPR128:$src))>; 8727def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), 8728 (v2f64 (REV64v8i16 FPR128:$src))>; 8729def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), 8730 (v2f64 (REV64v16i8 FPR128:$src))>; 8731def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), 8732 (v2f64 (REV64v4i32 FPR128:$src))>; 8733} 8734def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; 8735 8736let Predicates = [IsLE] in { 8737def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; 8738def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; 8739def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; 8740def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>; 8741def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; 8742def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; 8743def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; 8744} 8745let Predicates = [IsBE] in { 8746def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), 8747 (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8748 (REV64v4i32 FPR128:$src), (i32 8)))>; 8749def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), 8750 (v4f32 (REV32v8i16 FPR128:$src))>; 8751def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), 8752 (v4f32 (REV32v8i16 FPR128:$src))>; 8753def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), 8754 (v4f32 (REV32v8i16 FPR128:$src))>; 8755def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), 8756 (v4f32 (REV32v16i8 FPR128:$src))>; 8757def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), 8758 (v4f32 (REV64v4i32 FPR128:$src))>; 8759def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), 8760 (v4f32 (REV64v4i32 FPR128:$src))>; 8761} 8762def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; 8763 8764let Predicates = [IsLE] in { 8765def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; 8766def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; 8767def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; 8768def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; 8769def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; 8770def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; 8771def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>; 8772} 8773let Predicates = [IsBE] in { 8774def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), 8775 (v2i64 (EXTv16i8 FPR128:$src, 8776 FPR128:$src, (i32 8)))>; 8777def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), 8778 (v2i64 (REV64v4i32 FPR128:$src))>; 8779def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), 8780 (v2i64 (REV64v8i16 FPR128:$src))>; 8781def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), 8782 (v2i64 (REV64v16i8 FPR128:$src))>; 8783def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), 8784 (v2i64 (REV64v4i32 FPR128:$src))>; 8785def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), 8786 (v2i64 (REV64v8i16 FPR128:$src))>; 8787def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), 8788 (v2i64 (REV64v8i16 FPR128:$src))>; 8789} 8790def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; 8791 8792let Predicates = [IsLE] in { 8793def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; 8794def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; 8795def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; 8796def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; 8797def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; 8798def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; 8799def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>; 8800} 8801let Predicates = [IsBE] in { 8802def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), 8803 (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8804 (REV64v4i32 FPR128:$src), 8805 (i32 8)))>; 8806def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), 8807 (v4i32 (REV64v4i32 FPR128:$src))>; 8808def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), 8809 (v4i32 (REV32v8i16 FPR128:$src))>; 8810def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), 8811 (v4i32 (REV32v16i8 FPR128:$src))>; 8812def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), 8813 (v4i32 (REV64v4i32 FPR128:$src))>; 8814def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), 8815 (v4i32 (REV32v8i16 FPR128:$src))>; 8816def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), 8817 (v4i32 (REV32v8i16 FPR128:$src))>; 8818} 8819def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; 8820 8821let Predicates = [IsLE] in { 8822def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; 8823def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; 8824def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; 8825def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; 8826def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; 8827def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; 8828} 8829let Predicates = [IsBE] in { 8830def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), 8831 (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8832 (REV64v8i16 FPR128:$src), 8833 (i32 8)))>; 8834def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), 8835 (v8i16 (REV64v8i16 FPR128:$src))>; 8836def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), 8837 (v8i16 (REV32v8i16 FPR128:$src))>; 8838def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), 8839 (v8i16 (REV16v16i8 FPR128:$src))>; 8840def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), 8841 (v8i16 (REV64v8i16 FPR128:$src))>; 8842def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), 8843 (v8i16 (REV32v8i16 FPR128:$src))>; 8844} 8845def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; 8846def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>; 8847 8848let Predicates = [IsLE] in { 8849def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; 8850def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; 8851def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; 8852def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; 8853def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; 8854def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; 8855 8856def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>; 8857def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8858def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8859def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; 8860def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8861def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8862} 8863let Predicates = [IsBE] in { 8864def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), 8865 (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8866 (REV64v8i16 FPR128:$src), 8867 (i32 8)))>; 8868def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), 8869 (v8f16 (REV64v8i16 FPR128:$src))>; 8870def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), 8871 (v8f16 (REV32v8i16 FPR128:$src))>; 8872def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), 8873 (v8f16 (REV16v16i8 FPR128:$src))>; 8874def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), 8875 (v8f16 (REV64v8i16 FPR128:$src))>; 8876def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), 8877 (v8f16 (REV32v8i16 FPR128:$src))>; 8878 8879def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), 8880 (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8881 (REV64v8i16 FPR128:$src), 8882 (i32 8)))>; 8883def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), 8884 (v8bf16 (REV64v8i16 FPR128:$src))>; 8885def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), 8886 (v8bf16 (REV32v8i16 FPR128:$src))>; 8887def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), 8888 (v8bf16 (REV16v16i8 FPR128:$src))>; 8889def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), 8890 (v8bf16 (REV64v8i16 FPR128:$src))>; 8891def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), 8892 (v8bf16 (REV32v8i16 FPR128:$src))>; 8893} 8894def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; 8895def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; 8896 8897let Predicates = [IsLE] in { 8898def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; 8899def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; 8900def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; 8901def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; 8902def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; 8903def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; 8904def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; 8905def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>; 8906} 8907let Predicates = [IsBE] in { 8908def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), 8909 (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), 8910 (REV64v16i8 FPR128:$src), 8911 (i32 8)))>; 8912def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), 8913 (v16i8 (REV64v16i8 FPR128:$src))>; 8914def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), 8915 (v16i8 (REV32v16i8 FPR128:$src))>; 8916def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), 8917 (v16i8 (REV16v16i8 FPR128:$src))>; 8918def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), 8919 (v16i8 (REV64v16i8 FPR128:$src))>; 8920def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), 8921 (v16i8 (REV32v16i8 FPR128:$src))>; 8922def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), 8923 (v16i8 (REV16v16i8 FPR128:$src))>; 8924def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), 8925 (v16i8 (REV16v16i8 FPR128:$src))>; 8926} 8927 8928def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), 8929 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8930def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), 8931 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8932def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), 8933 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8934def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), 8935 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8936def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))), 8937 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8938def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), 8939 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8940def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), 8941 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8942def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), 8943 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8944 8945def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), 8946 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8947def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), 8948 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8949def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), 8950 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8951def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), 8952 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8953 8954// A 64-bit subvector insert to the first 128-bit vector position 8955// is a subregister copy that needs no instruction. 8956multiclass InsertSubvectorUndef<ValueType Ty> { 8957 def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), 8958 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8959 def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), 8960 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8961 def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), 8962 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8963 def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), 8964 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8965 def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), 8966 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8967 def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), 8968 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8969 def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)), 8970 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8971 def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), 8972 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8973} 8974 8975defm : InsertSubvectorUndef<i32>; 8976defm : InsertSubvectorUndef<i64>; 8977 8978// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 8979// or v2f32. 8980def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), 8981 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), 8982 (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; 8983def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), 8984 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), 8985 (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; 8986 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, 8987 // so we match on v4f32 here, not v2f32. This will also catch adding 8988 // the low two lanes of a true v4f32 vector. 8989def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), 8990 (vector_extract (v4f32 FPR128:$Rn), (i64 1))), 8991 (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8992def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), 8993 (vector_extract (v8f16 FPR128:$Rn), (i64 1))), 8994 (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8995 8996// Prefer using the bottom lanes of addp Rn, Rn compared to 8997// addp extractlow(Rn), extracthigh(Rn) 8998def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))), 8999 (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))), 9000 (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>; 9001def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))), 9002 (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))), 9003 (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>; 9004def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))), 9005 (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))), 9006 (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>; 9007 9008def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))), 9009 (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))), 9010 (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>; 9011def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))), 9012 (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))), 9013 (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>; 9014 9015// Scalar 64-bit shifts in FPR64 registers. 9016def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 9017 (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 9018def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 9019 (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 9020def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 9021 (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 9022def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 9023 (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 9024 9025// Patterns for nontemporal/no-allocate stores. 9026// We have to resort to tricks to turn a single-input store into a store pair, 9027// because there is no single-input nontemporal store, only STNP. 9028let Predicates = [IsLE] in { 9029let AddedComplexity = 15 in { 9030class NTStore128Pat<ValueType VT> : 9031 Pat<(nontemporalstore (VT FPR128:$Rt), 9032 (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 9033 (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), 9034 (DUPi64 FPR128:$Rt, (i64 1)), 9035 GPR64sp:$Rn, simm7s8:$offset)>; 9036 9037def : NTStore128Pat<v2i64>; 9038def : NTStore128Pat<v4i32>; 9039def : NTStore128Pat<v8i16>; 9040def : NTStore128Pat<v16i8>; 9041 9042class NTStore64Pat<ValueType VT> : 9043 Pat<(nontemporalstore (VT FPR64:$Rt), 9044 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 9045 (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), 9046 (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), 9047 GPR64sp:$Rn, simm7s4:$offset)>; 9048 9049// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? 9050def : NTStore64Pat<v1f64>; 9051def : NTStore64Pat<v1i64>; 9052def : NTStore64Pat<v2i32>; 9053def : NTStore64Pat<v4i16>; 9054def : NTStore64Pat<v8i8>; 9055 9056def : Pat<(nontemporalstore GPR64:$Rt, 9057 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 9058 (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), 9059 (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), 9060 GPR64sp:$Rn, simm7s4:$offset)>; 9061} // AddedComplexity=10 9062} // Predicates = [IsLE] 9063 9064// Tail call return handling. These are all compiler pseudo-instructions, 9065// so no encoding information or anything like that. 9066let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { 9067 def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, 9068 Sched<[WriteBrReg]>; 9069 def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, 9070 Sched<[WriteBrReg]>; 9071 // Indirect tail-call with any register allowed, used by MachineOutliner when 9072 // this is proven safe. 9073 // FIXME: If we have to add any more hacks like this, we should instead relax 9074 // some verifier checks for outlined functions. 9075 def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>, 9076 Sched<[WriteBrReg]>; 9077 // Indirect tail-call limited to only use registers (x16 and x17) which are 9078 // allowed to tail-call a "BTI c" instruction. 9079 def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>, 9080 Sched<[WriteBrReg]>; 9081} 9082 9083def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), 9084 (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>, 9085 Requires<[NotUseBTI]>; 9086def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)), 9087 (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>, 9088 Requires<[UseBTI]>; 9089def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), 9090 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 9091def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), 9092 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 9093 9094def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; 9095def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; 9096 9097// Extracting lane zero is a special case where we can just use a plain 9098// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the 9099// rest of the compiler, especially the register allocator and copy propagation, 9100// to reason about, so is preferred when it's possible to use it. 9101let AddedComplexity = 10 in { 9102 def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; 9103 def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; 9104 def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; 9105} 9106 9107// dot_v4i8 9108class mul_v4i8<SDPatternOperator ldop> : 9109 PatFrag<(ops node:$Rn, node:$Rm, node:$offset), 9110 (mul (ldop (add node:$Rn, node:$offset)), 9111 (ldop (add node:$Rm, node:$offset)))>; 9112class mulz_v4i8<SDPatternOperator ldop> : 9113 PatFrag<(ops node:$Rn, node:$Rm), 9114 (mul (ldop node:$Rn), (ldop node:$Rm))>; 9115 9116def load_v4i8 : 9117 OutPatFrag<(ops node:$R), 9118 (INSERT_SUBREG 9119 (v2i32 (IMPLICIT_DEF)), 9120 (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)), 9121 ssub)>; 9122 9123class dot_v4i8<Instruction DOT, SDPatternOperator ldop> : 9124 Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)), 9125 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)), 9126 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)), 9127 (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))), 9128 (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR), 9129 (load_v4i8 GPR64sp:$Rn), 9130 (load_v4i8 GPR64sp:$Rm))), 9131 sub_32)>, Requires<[HasDotProd]>; 9132 9133// dot_v8i8 9134class ee_v8i8<SDPatternOperator extend> : 9135 PatFrag<(ops node:$V, node:$K), 9136 (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>; 9137 9138class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 9139 PatFrag<(ops node:$M, node:$N, node:$K), 9140 (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)), 9141 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>; 9142 9143class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 9144 PatFrag<(ops node:$M, node:$N), 9145 (i32 (extractelt 9146 (v4i32 (AArch64uaddv 9147 (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)), 9148 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))), 9149 (i64 0)))>; 9150 9151// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 9152def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>; 9153 9154class odot_v8i8<Instruction DOT> : 9155 OutPatFrag<(ops node:$Vm, node:$Vn), 9156 (EXTRACT_SUBREG 9157 (VADDV_32 9158 (i64 (DOT (DUPv2i32gpr WZR), 9159 (v8i8 node:$Vm), 9160 (v8i8 node:$Vn)))), 9161 sub_32)>; 9162 9163class dot_v8i8<Instruction DOT, SDPatternOperator mulop, 9164 SDPatternOperator extend> : 9165 Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn), 9166 (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>, 9167 Requires<[HasDotProd]>; 9168 9169// dot_v16i8 9170class ee_v16i8<SDPatternOperator extend> : 9171 PatFrag<(ops node:$V, node:$K1, node:$K2), 9172 (v4i16 (extract_subvector 9173 (v8i16 (extend 9174 (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>; 9175 9176class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> : 9177 PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2), 9178 (v4i32 9179 (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)), 9180 (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>; 9181 9182class idot_v16i8<SDPatternOperator m, SDPatternOperator x> : 9183 PatFrag<(ops node:$M, node:$N), 9184 (i32 (extractelt 9185 (v4i32 (AArch64uaddv 9186 (add 9187 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)), 9188 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))), 9189 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)), 9190 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))), 9191 (i64 0)))>; 9192 9193class odot_v16i8<Instruction DOT> : 9194 OutPatFrag<(ops node:$Vm, node:$Vn), 9195 (i32 (ADDVv4i32v 9196 (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>; 9197 9198class dot_v16i8<Instruction DOT, SDPatternOperator mulop, 9199 SDPatternOperator extend> : 9200 Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn), 9201 (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>, 9202 Requires<[HasDotProd]>; 9203 9204let AddedComplexity = 10 in { 9205 def : dot_v4i8<SDOTv8i8, sextloadi8>; 9206 def : dot_v4i8<UDOTv8i8, zextloadi8>; 9207 def : dot_v8i8<SDOTv8i8, AArch64smull, sext>; 9208 def : dot_v8i8<UDOTv8i8, AArch64umull, zext>; 9209 def : dot_v16i8<SDOTv16i8, AArch64smull, sext>; 9210 def : dot_v16i8<UDOTv16i8, AArch64umull, zext>; 9211 9212 // FIXME: add patterns to generate vector by element dot product. 9213 // FIXME: add SVE dot-product patterns. 9214} 9215 9216// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs, 9217// so that it can be used as input to inline asm, and vice versa. 9218def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>; 9219def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>; 9220def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, 9221 GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)), 9222 (REG_SEQUENCE GPR64x8Class, 9223 $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, 9224 $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>; 9225foreach i = 0-7 in { 9226 def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))), 9227 (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>; 9228} 9229 9230let Predicates = [HasLS64] in { 9231 def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), 9232 (outs GPR64x8:$Rt)>; 9233 def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn), 9234 (outs)>; 9235 def ST64BV: Store64BV<0b011, "st64bv">; 9236 def ST64BV0: Store64BV<0b010, "st64bv0">; 9237 9238 class ST64BPattern<Intrinsic intrinsic, Instruction instruction> 9239 : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7), 9240 (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>; 9241 9242 def : ST64BPattern<int_aarch64_st64b, ST64B>; 9243 def : ST64BPattern<int_aarch64_st64bv, ST64BV>; 9244 def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>; 9245} 9246 9247let Predicates = [HasMOPS] in { 9248 let Defs = [NZCV] in { 9249 defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; 9250 9251 defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; 9252 9253 defm SETP : MOPSMemorySetInsns<0b00, "setp">; 9254 } 9255 let Uses = [NZCV] in { 9256 defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; 9257 defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; 9258 9259 defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; 9260 defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; 9261 9262 defm SETM : MOPSMemorySetInsns<0b01, "setm">; 9263 defm SETE : MOPSMemorySetInsns<0b10, "sete">; 9264 } 9265} 9266let Predicates = [HasMOPS, HasMTE] in { 9267 let Defs = [NZCV] in { 9268 defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; 9269 } 9270 let Uses = [NZCV] in { 9271 defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; 9272 // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td 9273 defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; 9274 } 9275} 9276 9277// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain 9278// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain 9279def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; 9280def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; 9281def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; 9282def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; 9283def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; 9284 9285// MOPS operations always contain three 4-byte instructions 9286let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { 9287 let mayLoad = 1 in { 9288 def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 9289 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 9290 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 9291 def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 9292 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 9293 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 9294 } 9295 let mayLoad = 0 in { 9296 def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 9297 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 9298 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 9299 } 9300} 9301let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in { 9302 def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 9303 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 9304 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 9305} 9306 9307//----------------------------------------------------------------------------- 9308// v8.3 Pointer Authentication late patterns 9309 9310let Predicates = [HasPAuth] in { 9311def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm), 9312 (MOVKXi GPR64:$Rd, (trunc_imm imm64_0_65535:$imm), 48)>; 9313def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn), 9314 (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>; 9315} 9316 9317//----------------------------------------------------------------------------- 9318 9319// This gets lowered into an instruction sequence of 20 bytes 9320let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in 9321def StoreSwiftAsyncContext 9322 : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), 9323 []>, Sched<[]>; 9324 9325def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>; 9326def : Pat<(AArch64AssertZExtBool GPR32:$op), 9327 (i32 GPR32:$op)>; 9328 9329//===----------------------------===// 9330// 2022 Architecture Extensions: 9331//===----------------------------===// 9332 9333def : InstAlias<"clrbhb", (HINT 22), 0>; 9334let Predicates = [HasCLRBHB] in { 9335 def : InstAlias<"clrbhb", (HINT 22), 1>; 9336} 9337 9338//===----------------------------------------------------------------------===// 9339// Translation Hardening Extension (FEAT_THE) 9340//===----------------------------------------------------------------------===// 9341defm RCW : ReadCheckWriteCompareAndSwap; 9342 9343defm RCWCLR : ReadCheckWriteOperation<0b001, "clr">; 9344defm RCWSET : ReadCheckWriteOperation<0b011, "set">; 9345defm RCWSWP : ReadCheckWriteOperation<0b010, "swp">; 9346 9347//===----------------------------------------------------------------------===// 9348// General Data-Processing Instructions (FEAT_V94_DP) 9349//===----------------------------------------------------------------------===// 9350defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>; 9351defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>; 9352defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>; 9353 9354defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>; 9355defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>; 9356defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>; 9357defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>; 9358 9359def RPRFM: 9360 I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn), 9361 "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>, 9362 Sched<[]> { 9363 bits<6> Rt; 9364 bits<5> Rn; 9365 bits<5> Rm; 9366 let Inst{2-0} = Rt{2-0}; 9367 let Inst{4-3} = 0b11; 9368 let Inst{9-5} = Rn; 9369 let Inst{11-10} = 0b10; 9370 let Inst{13-12} = Rt{4-3}; 9371 let Inst{14} = 0b1; 9372 let Inst{15} = Rt{5}; 9373 let Inst{20-16} = Rm; 9374 let Inst{31-21} = 0b11111000101; 9375 let mayLoad = 0; 9376 let mayStore = 0; 9377 let hasSideEffects = 1; 9378 // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns 9379 // Fail, the decoder should attempt to decode RPRFM. This requires setting 9380 // the decoder namespace to "Fallback". 9381 let DecoderNamespace = "Fallback"; 9382} 9383 9384//===----------------------------------------------------------------------===// 9385// 128-bit Atomics (FEAT_LSE128) 9386//===----------------------------------------------------------------------===// 9387let Predicates = [HasLSE128] in { 9388 def SWPP : LSE128Base<0b000, 0b00, 0b1, "swpp">; 9389 def SWPPA : LSE128Base<0b000, 0b10, 0b1, "swppa">; 9390 def SWPPAL : LSE128Base<0b000, 0b11, 0b1, "swppal">; 9391 def SWPPL : LSE128Base<0b000, 0b01, 0b1, "swppl">; 9392 def LDCLRP : LSE128Base<0b001, 0b00, 0b0, "ldclrp">; 9393 def LDCLRPA : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">; 9394 def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">; 9395 def LDCLRPL : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">; 9396 def LDSETP : LSE128Base<0b011, 0b00, 0b0, "ldsetp">; 9397 def LDSETPA : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">; 9398 def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">; 9399 def LDSETPL : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">; 9400} 9401 9402//===----------------------------------------------------------------------===// 9403// RCPC Instructions (FEAT_LRCPC3) 9404//===----------------------------------------------------------------------===// 9405 9406let Predicates = [HasRCPC3] in { 9407 // size opc opc2 9408 def STILPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">; 9409 def STILPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">; 9410 def STILPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 9411 def STILPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 9412 def LDIAPPWpost: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">; 9413 def LDIAPPXpost: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">; 9414 def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 9415 def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 9416 9417 def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>; 9418 def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>; 9419 9420 // Aliases for when offset=0 9421 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>; 9422 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>; 9423 9424 // size opc 9425 def STLRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">; 9426 def STLRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">; 9427 def LDAPRWpost: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #4", "$Rn = $wback">; 9428 def LDAPRXpost: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #8", "$Rn = $wback">; 9429} 9430 9431let Predicates = [HasRCPC3, HasNEON] in { 9432 // size opc regtype 9433 defm STLURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8 , (outs), (ins FPR8 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9434 defm STLURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9435 defm STLURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9436 defm STLURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9437 defm STLURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9438 defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8 , (outs FPR8 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9439 defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9440 defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9441 defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9442 defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9443 9444 // L 9445 def STL1: LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">; 9446 def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">; 9447 9448 // Aliases for when offset=0 9449 def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>; 9450} 9451 9452//===----------------------------------------------------------------------===// 9453// 128-bit System Instructions (FEAT_SYSINSTR128) 9454//===----------------------------------------------------------------------===// 9455let Predicates = [HasD128] in { 9456 def SYSPxt : SystemPXtI<0, "sysp">; 9457 9458 def SYSPxt_XZR 9459 : BaseSystemI<0, (outs), 9460 (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair), 9461 "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">, 9462 Sched<[WriteSys]> 9463 { 9464 // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?) 9465 // and therefore autogenerates a decoder that builds an MC representation that has 4 fields 9466 // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one 9467 // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc 9468 // is based off of the asm template (maybe) and therefore wants to print 5 operands. 9469 // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would 9470 // overlap with the main SYSP instruction. 9471 let DecoderMethod = "DecodeSyspXzrInstruction"; 9472 bits<3> op1; 9473 bits<4> Cn; 9474 bits<4> Cm; 9475 bits<3> op2; 9476 let Inst{22} = 0b1; // override BaseSystemI 9477 let Inst{20-19} = 0b01; 9478 let Inst{18-16} = op1; 9479 let Inst{15-12} = Cn; 9480 let Inst{11-8} = Cm; 9481 let Inst{7-5} = op2; 9482 let Inst{4-0} = 0b11111; 9483 } 9484 9485 def : InstAlias<"sysp $op1, $Cn, $Cm, $op2", 9486 (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 9487} 9488 9489//--- 9490// 128-bit System Registers (FEAT_SYSREG128) 9491//--- 9492 9493// Instruction encoding: 9494// 9495// 31 22|21|20|19|18 16|15 12|11 8|7 5|4 0 9496// MRRS 1101010101| 1| 1|o0| op1| Cn| Cm|op2| Rt 9497// MSRR 1101010101| 0| 1|o0| op1| Cn| Cm|op2| Rt 9498 9499// Instruction syntax: 9500// 9501// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>> 9502// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1> 9503// 9504// ...where t is even (X0, X2, etc). 9505 9506let Predicates = [HasD128] in { 9507 def MRRS : RtSystemI128<1, 9508 (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg), 9509 "mrrs", "\t$Rt, $systemreg"> 9510 { 9511 bits<16> systemreg; 9512 let Inst{20-5} = systemreg; 9513 } 9514 9515 def MSRR : RtSystemI128<0, 9516 (outs), (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt), 9517 "msrr", "\t$systemreg, $Rt"> 9518 { 9519 bits<16> systemreg; 9520 let Inst{20-5} = systemreg; 9521 } 9522} 9523 9524//===----------------------------===// 9525// 2023 Architecture Extensions: 9526//===----------------------------===// 9527 9528let Predicates = [HasFP8] in { 9529 defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">; 9530 defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">; 9531 defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">; 9532 defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">; 9533 defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">; 9534 defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">; 9535 defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>; 9536} // End let Predicates = [HasFP8] 9537 9538let Predicates = [HasFAMINMAX] in { 9539 defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>; 9540 defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>; 9541} // End let Predicates = [HasFAMAXMIN] 9542 9543let Predicates = [HasFP8FMA] in { 9544 defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">; 9545 defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">; 9546 defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">; 9547 defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">; 9548 defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">; 9549 defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">; 9550 9551 defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">; 9552 defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">; 9553 defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">; 9554 defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">; 9555 defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">; 9556 defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">; 9557} // End let Predicates = [HasFP8FMA] 9558 9559let Predicates = [HasFP8DOT2] in { 9560 defm FDOTlane : SIMDThreeSameVectorFP8DOT2Index<"fdot">; 9561 defm FDOT : SIMDThreeSameVectorDOT2<"fdot">; 9562} // End let Predicates = [HasFP8DOT2] 9563 9564let Predicates = [HasFP8DOT4] in { 9565 defm FDOTlane : SIMDThreeSameVectorFP8DOT4Index<"fdot">; 9566 defm FDOT : SIMDThreeSameVectorDOT4<"fdot">; 9567} // End let Predicates = [HasFP8DOT4] 9568 9569//===----------------------------------------------------------------------===// 9570// Checked Pointer Arithmetic (FEAT_CPA) 9571//===----------------------------------------------------------------------===// 9572let Predicates = [HasCPA] in { 9573 // Scalar add/subtract 9574 defm ADDPT : AddSubCPA<0, "addpt">; 9575 defm SUBPT : AddSubCPA<1, "subpt">; 9576 9577 // Scalar multiply-add/subtract 9578 def MADDPT : MulAccumCPA<0, "maddpt">; 9579 def MSUBPT : MulAccumCPA<1, "msubpt">; 9580} 9581 9582include "AArch64InstrAtomics.td" 9583include "AArch64SVEInstrInfo.td" 9584include "AArch64SMEInstrInfo.td" 9585include "AArch64InstrGISel.td" 9586