1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Instruction definitions. 10// 11//===----------------------------------------------------------------------===// 12 13//===----------------------------------------------------------------------===// 14// ARM Instruction Predicate Definitions. 15// 16def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">, 17 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">; 18def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, 19 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">; 20def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, 21 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">; 22def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, 23 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">; 24def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, 25 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">; 26def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, 27 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">; 28def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, 29 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">; 30def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">, 31 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">; 32def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">, 33 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">; 34def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">, 35 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">; 36def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">, 37 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">; 38def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, 39 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">; 40def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, 41 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">; 42def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, 43 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">; 44def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">, 45 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">; 46def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, 47 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">; 48 49def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">, 50 AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">; 51 52def HasEL3 : Predicate<"Subtarget->hasEL3()">, 53 AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">; 54 55def HasVH : Predicate<"Subtarget->hasVH()">, 56 AssemblerPredicateWithAll<(all_of FeatureVH), "vh">; 57 58def HasLOR : Predicate<"Subtarget->hasLOR()">, 59 AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">; 60 61def HasPAuth : Predicate<"Subtarget->hasPAuth()">, 62 AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; 63 64def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">, 65 AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">; 66 67def HasJS : Predicate<"Subtarget->hasJS()">, 68 AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; 69 70def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, 71 AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">; 72 73def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, 74 AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">; 75 76def HasNV : Predicate<"Subtarget->hasNV()">, 77 AssemblerPredicateWithAll<(all_of FeatureNV), "nv">; 78 79def HasMPAM : Predicate<"Subtarget->hasMPAM()">, 80 AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">; 81 82def HasDIT : Predicate<"Subtarget->hasDIT()">, 83 AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">; 84 85def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, 86 AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">; 87 88def HasAM : Predicate<"Subtarget->hasAM()">, 89 AssemblerPredicateWithAll<(all_of FeatureAM), "am">; 90 91def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, 92 AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">; 93 94def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, 95 AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">; 96 97def HasFlagM : Predicate<"Subtarget->hasFlagM()">, 98 AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; 99 100def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">, 101 AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; 102 103def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, 104 AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">; 105def HasNEON : Predicate<"Subtarget->hasNEON()">, 106 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">; 107def HasSM4 : Predicate<"Subtarget->hasSM4()">, 108 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">; 109def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, 110 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">; 111def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, 112 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">; 113def HasAES : Predicate<"Subtarget->hasAES()">, 114 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">; 115def HasDotProd : Predicate<"Subtarget->hasDotProd()">, 116 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">; 117def HasCRC : Predicate<"Subtarget->hasCRC()">, 118 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">; 119def HasCSSC : Predicate<"Subtarget->hasCSSC()">, 120 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">; 121def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">; 122def HasLSE : Predicate<"Subtarget->hasLSE()">, 123 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">; 124def HasNoLSE : Predicate<"!Subtarget->hasLSE()">; 125def HasRAS : Predicate<"Subtarget->hasRAS()">, 126 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">; 127def HasRDM : Predicate<"Subtarget->hasRDM()">, 128 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">; 129def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, 130 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">; 131def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, 132 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">; 133def HasSPE : Predicate<"Subtarget->hasSPE()">, 134 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">; 135def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, 136 AssemblerPredicateWithAll<(all_of FeatureFuseAES), 137 "fuse-aes">; 138def HasSVE : Predicate<"Subtarget->hasSVE()">, 139 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">; 140def HasSVE2 : Predicate<"Subtarget->hasSVE2()">, 141 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">; 142def HasSVE2p1 : Predicate<"Subtarget->hasSVE2p1()">, 143 AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">; 144def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">, 145 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">; 146def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, 147 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">; 148def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, 149 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">; 150def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, 151 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; 152def HasB16B16 : Predicate<"Subtarget->hasB16B16()">, 153 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">; 154def HasSME : Predicate<"Subtarget->hasSME()">, 155 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; 156def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">, 157 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">; 158def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">, 159 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">; 160def HasSMEFA64 : Predicate<"Subtarget->hasSMEFA64()">, 161 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">; 162def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">, 163 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">; 164def HasSME2 : Predicate<"Subtarget->hasSME2()">, 165 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">; 166def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">, 167 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">; 168def HasFPMR : Predicate<"Subtarget->hasFPMR()">, 169 AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">; 170def HasFP8 : Predicate<"Subtarget->hasFP8()">, 171 AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">; 172def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">, 173 AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">; 174def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">, 175 AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">; 176def HasSSVE_FP8FMA : Predicate<"Subtarget->SSVE_FP8FMA() || " 177 "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">, 178 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA, 179 (all_of FeatureSVE2, FeatureFP8FMA)), 180 "ssve-fp8fma or (sve2 and fp8fma)">; 181def HasFP8DOT2 : Predicate<"Subtarget->hasFP8DOT2()">, 182 AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">; 183def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || " 184 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">, 185 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2, 186 (all_of FeatureSVE2, FeatureFP8DOT2)), 187 "ssve-fp8dot2 or (sve2 and fp8dot2)">; 188def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">, 189 AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">; 190def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || " 191 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">, 192 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4, 193 (all_of FeatureSVE2, FeatureFP8DOT4)), 194 "ssve-fp8dot4 or (sve2 and fp8dot4)">; 195def HasLUT : Predicate<"Subtarget->hasLUT()">, 196 AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">; 197def HasSME_LUTv2 : Predicate<"Subtarget->hasSME_LUTv2()">, 198 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">; 199def HasSMEF8F16 : Predicate<"Subtarget->hasSMEF8F16()">, 200 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">; 201def HasSMEF8F32 : Predicate<"Subtarget->hasSMEF8F32()">, 202 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">; 203 204// A subset of SVE(2) instructions are legal in Streaming SVE execution mode, 205// they should be enabled if either has been specified. 206def HasSVEorSME 207 : Predicate<"Subtarget->hasSVEorSME()">, 208 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), 209 "sve or sme">; 210def HasSVE2orSME 211 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">, 212 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME), 213 "sve2 or sme">; 214def HasSVE2orSME2 215 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">, 216 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2), 217 "sve2 or sme2">; 218def HasSVE2p1_or_HasSME 219 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">, 220 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; 221def HasSVE2p1_or_HasSME2 222 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">, 223 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; 224def HasSVE2p1_or_HasSME2p1 225 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">, 226 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">; 227// A subset of NEON instructions are legal in Streaming SVE execution mode, 228// they should be enabled if either has been specified. 229def HasNEONorSME 230 : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">, 231 AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME), 232 "neon or sme">; 233def HasRCPC : Predicate<"Subtarget->hasRCPC()">, 234 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; 235def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, 236 AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">; 237def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">, 238 AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">; 239def HasSB : Predicate<"Subtarget->hasSB()">, 240 AssemblerPredicateWithAll<(all_of FeatureSB), "sb">; 241def HasPredRes : Predicate<"Subtarget->hasPredRes()">, 242 AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">; 243def HasCCDP : Predicate<"Subtarget->hasCCDP()">, 244 AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; 245def HasBTI : Predicate<"Subtarget->hasBTI()">, 246 AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; 247def HasMTE : Predicate<"Subtarget->hasMTE()">, 248 AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; 249def HasTME : Predicate<"Subtarget->hasTME()">, 250 AssemblerPredicateWithAll<(all_of FeatureTME), "tme">; 251def HasETE : Predicate<"Subtarget->hasETE()">, 252 AssemblerPredicateWithAll<(all_of FeatureETE), "ete">; 253def HasTRBE : Predicate<"Subtarget->hasTRBE()">, 254 AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">; 255def HasBF16 : Predicate<"Subtarget->hasBF16()">, 256 AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">; 257def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">, 258 AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">; 259def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">, 260 AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">; 261def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">, 262 AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">; 263def HasXS : Predicate<"Subtarget->hasXS()">, 264 AssemblerPredicateWithAll<(all_of FeatureXS), "xs">; 265def HasWFxT : Predicate<"Subtarget->hasWFxT()">, 266 AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">; 267def HasLS64 : Predicate<"Subtarget->hasLS64()">, 268 AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">; 269def HasBRBE : Predicate<"Subtarget->hasBRBE()">, 270 AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">; 271def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, 272 AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">; 273def HasHBC : Predicate<"Subtarget->hasHBC()">, 274 AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">; 275def HasMOPS : Predicate<"Subtarget->hasMOPS()">, 276 AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">; 277def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">, 278 AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">; 279def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">, 280 AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">; 281def HasITE : Predicate<"Subtarget->hasITE()">, 282 AssemblerPredicateWithAll<(all_of FeatureITE), "ite">; 283def HasTHE : Predicate<"Subtarget->hasTHE()">, 284 AssemblerPredicateWithAll<(all_of FeatureTHE), "the">; 285def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">, 286 AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">; 287def HasLSE128 : Predicate<"Subtarget->hasLSE128()">, 288 AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">; 289def HasD128 : Predicate<"Subtarget->hasD128()">, 290 AssemblerPredicateWithAll<(all_of FeatureD128), "d128">; 291def HasCHK : Predicate<"Subtarget->hasCHK()">, 292 AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">; 293def HasGCS : Predicate<"Subtarget->hasGCS()">, 294 AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">; 295def HasCPA : Predicate<"Subtarget->hasCPA()">, 296 AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">; 297def IsLE : Predicate<"Subtarget->isLittleEndian()">; 298def IsBE : Predicate<"!Subtarget->isLittleEndian()">; 299def IsWindows : Predicate<"Subtarget->isTargetWindows()">; 300def UseExperimentalZeroingPseudos 301 : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; 302def UseAlternateSExtLoadCVTF32 303 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; 304 305def UseNegativeImmediates 306 : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), 307 "NegativeImmediates">; 308 309def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; 310 311def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">; 312 313def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">; 314 315def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", 316 SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, 317 SDTCisInt<1>]>>; 318 319 320//===----------------------------------------------------------------------===// 321// AArch64-specific DAG Nodes. 322// 323 324// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS 325def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, 326 [SDTCisSameAs<0, 2>, 327 SDTCisSameAs<0, 3>, 328 SDTCisInt<0>, SDTCisVT<1, i32>]>; 329 330// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS 331def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, 332 [SDTCisSameAs<0, 1>, 333 SDTCisSameAs<0, 2>, 334 SDTCisInt<0>, 335 SDTCisVT<3, i32>]>; 336 337// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS 338def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, 339 [SDTCisSameAs<0, 2>, 340 SDTCisSameAs<0, 3>, 341 SDTCisInt<0>, 342 SDTCisVT<1, i32>, 343 SDTCisVT<4, i32>]>; 344 345def SDT_AArch64Brcond : SDTypeProfile<0, 3, 346 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, 347 SDTCisVT<2, i32>]>; 348def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; 349def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 350 SDTCisVT<2, OtherVT>]>; 351 352 353def SDT_AArch64CSel : SDTypeProfile<1, 4, 354 [SDTCisSameAs<0, 1>, 355 SDTCisSameAs<0, 2>, 356 SDTCisInt<3>, 357 SDTCisVT<4, i32>]>; 358def SDT_AArch64CCMP : SDTypeProfile<1, 5, 359 [SDTCisVT<0, i32>, 360 SDTCisInt<1>, 361 SDTCisSameAs<1, 2>, 362 SDTCisInt<3>, 363 SDTCisInt<4>, 364 SDTCisVT<5, i32>]>; 365def SDT_AArch64FCCMP : SDTypeProfile<1, 5, 366 [SDTCisVT<0, i32>, 367 SDTCisFP<1>, 368 SDTCisSameAs<1, 2>, 369 SDTCisInt<3>, 370 SDTCisInt<4>, 371 SDTCisVT<5, i32>]>; 372def SDT_AArch64FCmp : SDTypeProfile<0, 2, 373 [SDTCisFP<0>, 374 SDTCisSameAs<0, 1>]>; 375def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; 376def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; 377def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; 378def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, 379 SDTCisSameAs<0, 1>, 380 SDTCisSameAs<0, 2>]>; 381def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; 382def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 383def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 384 SDTCisInt<2>, SDTCisInt<3>]>; 385def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 386def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 387 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 388def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; 389def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 390 SDTCisVec<2>, SDTCisSameAs<2,3>]>; 391 392def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>, 393 SDTCisSameAs<0,1>, 394 SDTCisSameAs<0,2>]>; 395 396def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 397def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; 398def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; 399def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 400 SDTCisSameAs<0,2>]>; 401def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 402 SDTCisSameAs<0,2>, 403 SDTCisSameAs<0,3>]>; 404def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; 405def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; 406 407def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; 408 409def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, 410 SDTCisPtrTy<1>]>; 411 412def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 413 414def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 415def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 416def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 417def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 418def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 419def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 420 421// Generates the general dynamic sequences, i.e. 422// adrp x0, :tlsdesc:var 423// ldr x1, [x0, #:tlsdesc_lo12:var] 424// add x0, x0, #:tlsdesc_lo12:var 425// .tlsdesccall var 426// blr x1 427 428// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) 429// number of operands (the variable) 430def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, 431 [SDTCisPtrTy<0>]>; 432 433def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, 434 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, 435 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, 436 SDTCisSameAs<1, 4>]>; 437 438def SDT_AArch64TBL : SDTypeProfile<1, 2, [ 439 SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> 440]>; 441 442// non-extending masked load fragment. 443def nonext_masked_load : 444 PatFrag<(ops node:$ptr, node:$pred, node:$def), 445 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 446 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 447 cast<MaskedLoadSDNode>(N)->isUnindexed() && 448 !cast<MaskedLoadSDNode>(N)->isNonTemporal(); 449}]>; 450// Any/Zero extending masked load fragments. 451def azext_masked_load : 452 PatFrag<(ops node:$ptr, node:$pred, node:$def), 453 (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ 454 return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD || 455 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) && 456 cast<MaskedLoadSDNode>(N)->isUnindexed(); 457}]>; 458def azext_masked_load_i8 : 459 PatFrag<(ops node:$ptr, node:$pred, node:$def), 460 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 461 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 462}]>; 463def azext_masked_load_i16 : 464 PatFrag<(ops node:$ptr, node:$pred, node:$def), 465 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 466 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 467}]>; 468def azext_masked_load_i32 : 469 PatFrag<(ops node:$ptr, node:$pred, node:$def), 470 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 471 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 472}]>; 473// Sign extending masked load fragments. 474def sext_masked_load : 475 PatFrag<(ops node:$ptr, node:$pred, node:$def), 476 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 477 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD && 478 cast<MaskedLoadSDNode>(N)->isUnindexed(); 479}]>; 480def sext_masked_load_i8 : 481 PatFrag<(ops node:$ptr, node:$pred, node:$def), 482 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 483 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 484}]>; 485def sext_masked_load_i16 : 486 PatFrag<(ops node:$ptr, node:$pred, node:$def), 487 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 488 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 489}]>; 490def sext_masked_load_i32 : 491 PatFrag<(ops node:$ptr, node:$pred, node:$def), 492 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 493 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 494}]>; 495 496def non_temporal_load : 497 PatFrag<(ops node:$ptr, node:$pred, node:$def), 498 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 499 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 500 cast<MaskedLoadSDNode>(N)->isUnindexed() && 501 cast<MaskedLoadSDNode>(N)->isNonTemporal(); 502}]>; 503 504// non-truncating masked store fragment. 505def nontrunc_masked_store : 506 PatFrag<(ops node:$val, node:$ptr, node:$pred), 507 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 508 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 509 cast<MaskedStoreSDNode>(N)->isUnindexed() && 510 !cast<MaskedStoreSDNode>(N)->isNonTemporal(); 511}]>; 512// truncating masked store fragments. 513def trunc_masked_store : 514 PatFrag<(ops node:$val, node:$ptr, node:$pred), 515 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 516 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 517 cast<MaskedStoreSDNode>(N)->isUnindexed(); 518}]>; 519def trunc_masked_store_i8 : 520 PatFrag<(ops node:$val, node:$ptr, node:$pred), 521 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 522 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 523}]>; 524def trunc_masked_store_i16 : 525 PatFrag<(ops node:$val, node:$ptr, node:$pred), 526 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 527 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 528}]>; 529def trunc_masked_store_i32 : 530 PatFrag<(ops node:$val, node:$ptr, node:$pred), 531 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 532 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 533}]>; 534 535def non_temporal_store : 536 PatFrag<(ops node:$val, node:$ptr, node:$pred), 537 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 538 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 539 cast<MaskedStoreSDNode>(N)->isUnindexed() && 540 cast<MaskedStoreSDNode>(N)->isNonTemporal(); 541}]>; 542 543multiclass masked_gather_scatter<PatFrags GatherScatterOp> { 544 // offsets = (signed)Index << sizeof(elt) 545 def NAME#_signed_scaled : 546 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 547 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 548 auto MGS = cast<MaskedGatherScatterSDNode>(N); 549 bool Signed = MGS->isIndexSigned() || 550 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 551 return Signed && MGS->isIndexScaled(); 552 }]>; 553 // offsets = (signed)Index 554 def NAME#_signed_unscaled : 555 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 556 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 557 auto MGS = cast<MaskedGatherScatterSDNode>(N); 558 bool Signed = MGS->isIndexSigned() || 559 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 560 return Signed && !MGS->isIndexScaled(); 561 }]>; 562 // offsets = (unsigned)Index << sizeof(elt) 563 def NAME#_unsigned_scaled : 564 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 565 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 566 auto MGS = cast<MaskedGatherScatterSDNode>(N); 567 bool Signed = MGS->isIndexSigned() || 568 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 569 return !Signed && MGS->isIndexScaled(); 570 }]>; 571 // offsets = (unsigned)Index 572 def NAME#_unsigned_unscaled : 573 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 574 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 575 auto MGS = cast<MaskedGatherScatterSDNode>(N); 576 bool Signed = MGS->isIndexSigned() || 577 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 578 return !Signed && !MGS->isIndexScaled(); 579 }]>; 580} 581 582defm nonext_masked_gather : masked_gather_scatter<nonext_masked_gather>; 583defm azext_masked_gather_i8 : masked_gather_scatter<azext_masked_gather_i8>; 584defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>; 585defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>; 586defm sext_masked_gather_i8 : masked_gather_scatter<sext_masked_gather_i8>; 587defm sext_masked_gather_i16 : masked_gather_scatter<sext_masked_gather_i16>; 588defm sext_masked_gather_i32 : masked_gather_scatter<sext_masked_gather_i32>; 589 590defm nontrunc_masked_scatter : masked_gather_scatter<nontrunc_masked_scatter>; 591defm trunc_masked_scatter_i8 : masked_gather_scatter<trunc_masked_scatter_i8>; 592defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>; 593defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>; 594 595// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise 596def top16Zero: PatLeaf<(i32 GPR32:$src), [{ 597 return SDValue(N,0)->getValueType(0) == MVT::i32 && 598 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); 599 }]>; 600 601// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise 602def top32Zero: PatLeaf<(i64 GPR64:$src), [{ 603 return SDValue(N,0)->getValueType(0) == MVT::i64 && 604 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32)); 605 }]>; 606 607// topbitsallzero - Return true if all bits except the lowest bit are known zero 608def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{ 609 return SDValue(N,0)->getValueType(0) == MVT::i32 && 610 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31)); 611 }]>; 612def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{ 613 return SDValue(N,0)->getValueType(0) == MVT::i64 && 614 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63)); 615 }]>; 616 617// Node definitions. 618def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; 619def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; 620def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; 621def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; 622def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", 623 SDCallSeqStart<[ SDTCisVT<0, i32>, 624 SDTCisVT<1, i32> ]>, 625 [SDNPHasChain, SDNPOutGlue]>; 626def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", 627 SDCallSeqEnd<[ SDTCisVT<0, i32>, 628 SDTCisVT<1, i32> ]>, 629 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 630def AArch64call : SDNode<"AArch64ISD::CALL", 631 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 632 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 633 SDNPVariadic]>; 634 635def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", 636 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 637 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 638 SDNPVariadic]>; 639 640def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", 641 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 642 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 643 SDNPVariadic]>; 644 645def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, 646 [SDNPHasChain]>; 647def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, 648 [SDNPHasChain]>; 649def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, 650 [SDNPHasChain]>; 651def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, 652 [SDNPHasChain]>; 653def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, 654 [SDNPHasChain]>; 655 656 657def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; 658def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; 659def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; 660def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; 661def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone, 662 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 663def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; 664def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; 665def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, 666 [SDNPCommutative]>; 667def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; 668def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, 669 [SDNPCommutative]>; 670def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; 671def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; 672 673def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; 674def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; 675def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; 676 677def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; 678 679def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; 680def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, 681 [SDNPHasChain]>; 682def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, 683 [SDNPHasChain]>; 684def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), 685 [(AArch64strict_fcmp node:$lhs, node:$rhs), 686 (AArch64fcmp node:$lhs, node:$rhs)]>; 687 688def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; 689def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; 690def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; 691def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; 692def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; 693def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; 694 695def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; 696 697def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; 698def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; 699def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; 700def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; 701def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; 702def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; 703 704def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; 705def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; 706def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; 707def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; 708def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; 709def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; 710def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; 711 712def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; 713def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; 714def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; 715def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; 716 717def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; 718def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; 719def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; 720def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; 721def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; 722def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; 723def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; 724def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; 725def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; 726def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; 727 728def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; 729def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; 730 731def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; 732def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; 733def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; 734def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; 735def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; 736 737def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; 738def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; 739def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; 740 741def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; 742def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; 743def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; 744def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; 745def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; 746def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), 747 (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>; 748 749def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; 750def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; 751def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; 752def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; 753def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; 754 755def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; 756def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; 757 758def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, 759 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 760 761def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, 762 [SDNPHasChain, SDNPSideEffect]>; 763 764def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; 765def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; 766 767def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", 768 SDT_AArch64TLSDescCallSeq, 769 [SDNPInGlue, SDNPOutGlue, SDNPHasChain, 770 SDNPVariadic]>; 771 772 773def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", 774 SDT_AArch64WrapperLarge>; 775 776def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; 777 778def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 779 SDTCisSameAs<1, 2>]>; 780def AArch64pmull : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull, 781 [SDNPCommutative]>; 782def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull, 783 [SDNPCommutative]>; 784def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull, 785 [SDNPCommutative]>; 786 787def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; 788def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; 789def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; 790def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; 791 792def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>; 793def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>; 794 795def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; 796def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; 797def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; 798def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; 799def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; 800def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; 801def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>; 802 803def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), 804 [(abdu node:$lhs, node:$rhs), 805 (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; 806def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), 807 [(abds node:$lhs, node:$rhs), 808 (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; 809 810def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>; 811def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>; 812def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>; 813def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm), 814 [(AArch64addp_n node:$Rn, node:$Rm), 815 (int_aarch64_neon_addp node:$Rn, node:$Rm)]>; 816def AArch64uaddlp : PatFrags<(ops node:$src), 817 [(AArch64uaddlp_n node:$src), 818 (int_aarch64_neon_uaddlp node:$src)]>; 819def AArch64saddlp : PatFrags<(ops node:$src), 820 [(AArch64saddlp_n node:$src), 821 (int_aarch64_neon_saddlp node:$src)]>; 822def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm), 823 [(AArch64addp_n node:$Rn, node:$Rm), 824 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>; 825def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>; 826def AArch64rshrn : PatFrags<(ops node:$LHS, node:$RHS), 827 [(trunc (AArch64roundingvlshr node:$LHS, node:$RHS)), 828 (int_aarch64_neon_rshrn node:$LHS, node:$RHS)]>; 829def AArch64facge : PatFrags<(ops node:$Rn, node:$Rm), 830 [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)), 831 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>; 832def AArch64facgt : PatFrags<(ops node:$Rn, node:$Rm), 833 [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)), 834 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>; 835 836def AArch64fmaxnmv : PatFrags<(ops node:$Rn), 837 [(vecreduce_fmax node:$Rn), 838 (int_aarch64_neon_fmaxnmv node:$Rn)]>; 839def AArch64fminnmv : PatFrags<(ops node:$Rn), 840 [(vecreduce_fmin node:$Rn), 841 (int_aarch64_neon_fminnmv node:$Rn)]>; 842def AArch64fmaxv : PatFrags<(ops node:$Rn), 843 [(vecreduce_fmaximum node:$Rn), 844 (int_aarch64_neon_fmaxv node:$Rn)]>; 845def AArch64fminv : PatFrags<(ops node:$Rn), 846 [(vecreduce_fminimum node:$Rn), 847 (int_aarch64_neon_fminv node:$Rn)]>; 848 849def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; 850def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 851def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 852def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 853def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 854 855def SDT_AArch64unpk : SDTypeProfile<1, 1, [ 856 SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0> 857]>; 858def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>; 859def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>; 860def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>; 861def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; 862 863def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 864def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 865def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 866def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 867def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 868def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 869 870def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; 871 872def AArch64probedalloca 873 : SDNode<"AArch64ISD::PROBED_ALLOCA", 874 SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, 875 [SDNPHasChain, SDNPMayStore]>; 876 877def AArch64mrs : SDNode<"AArch64ISD::MRS", 878 SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, 879 [SDNPHasChain, SDNPOutGlue]>; 880 881def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>; 882def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>; 883def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i), 884 [(AArch64rshrnb node:$rs, node:$i), 885 (int_aarch64_sve_rshrnb node:$rs, node:$i)]>; 886 887def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1, 888 [SDTCisInt<0>, SDTCisVec<1>]>, []>; 889 890// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands 891// have no common bits. 892def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), 893 [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ 894 if (N->getOpcode() == ISD::ADD) 895 return true; 896 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); 897}]> { 898 let GISelPredicateCode = [{ 899 // Only handle G_ADD for now. FIXME. build capability to compute whether 900 // operands of G_OR have common bits set or not. 901 return MI.getOpcode() == TargetOpcode::G_ADD; 902 }]; 903} 904 905// Match mul with enough sign-bits. Can be reduced to a smaller mul operand. 906def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{ 907 return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 && 908 CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32; 909}]>; 910 911//===----------------------------------------------------------------------===// 912 913//===----------------------------------------------------------------------===// 914 915// AArch64 Instruction Predicate Definitions. 916// We could compute these on a per-module basis but doing so requires accessing 917// the Function object through the <Target>Subtarget and objections were raised 918// to that (see post-commit review comments for r301750). 919let RecomputePerFunction = 1 in { 920 def ForCodeSize : Predicate<"shouldOptForSize(MF)">; 921 def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">; 922 // Avoid generating STRQro if it is slow, unless we're optimizing for code size. 923 def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">; 924 925 def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 926 def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 927 928 def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 929 def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 930 // Toggles patterns which aren't beneficial in GlobalISel when we aren't 931 // optimizing. This allows us to selectively use patterns without impacting 932 // SelectionDAG's behaviour. 933 // FIXME: One day there will probably be a nicer way to check for this, but 934 // today is not that day. 935 def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; 936} 937 938include "AArch64InstrFormats.td" 939include "SVEInstrFormats.td" 940include "SMEInstrFormats.td" 941 942//===----------------------------------------------------------------------===// 943 944//===----------------------------------------------------------------------===// 945// Miscellaneous instructions. 946//===----------------------------------------------------------------------===// 947 948let hasSideEffects = 1, isCodeGenOnly = 1 in { 949let Defs = [SP], Uses = [SP] in { 950// We set Sched to empty list because we expect these instructions to simply get 951// removed in most cases. 952def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 953 [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, 954 Sched<[]>; 955def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 956 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, 957 Sched<[]>; 958 959} 960 961let Defs = [SP, NZCV], Uses = [SP] in { 962// Probed stack allocation of a constant size, used in function prologues when 963// stack-clash protection is enabled. 964def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch), 965 (ins i64imm:$stacksize, i64imm:$fixed_offset, 966 i64imm:$scalable_offset), 967 []>, 968 Sched<[]>; 969 970// Probed stack allocation of a variable size, used in function prologues when 971// stack-clash protection is enabled. 972def PROBED_STACKALLOC_VAR : Pseudo<(outs), 973 (ins GPR64sp:$target), 974 []>, 975 Sched<[]>; 976 977// Probed stack allocations of a variable size, used for allocas of unknown size 978// when stack-clash protection is enabled. 979let usesCustomInserter = 1 in 980def PROBED_STACKALLOC_DYN : Pseudo<(outs), 981 (ins GPR64common:$target), 982 [(AArch64probedalloca GPR64common:$target)]>, 983 Sched<[]>; 984 985} // Defs = [SP, NZCV], Uses = [SP] in 986} // hasSideEffects = 1, isCodeGenOnly = 1 987 988let isReMaterializable = 1, isCodeGenOnly = 1 in { 989// FIXME: The following pseudo instructions are only needed because remat 990// cannot handle multiple instructions. When that changes, they can be 991// removed, along with the AArch64Wrapper node. 992 993let AddedComplexity = 10 in 994def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), 995 [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, 996 Sched<[WriteLDAdr]>; 997 998// The MOVaddr instruction should match only when the add is not folded 999// into a load or store address. 1000def MOVaddr 1001 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1002 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), 1003 tglobaladdr:$low))]>, 1004 Sched<[WriteAdrAdr]>; 1005def MOVaddrJT 1006 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1007 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), 1008 tjumptable:$low))]>, 1009 Sched<[WriteAdrAdr]>; 1010def MOVaddrCP 1011 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1012 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), 1013 tconstpool:$low))]>, 1014 Sched<[WriteAdrAdr]>; 1015def MOVaddrBA 1016 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1017 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), 1018 tblockaddress:$low))]>, 1019 Sched<[WriteAdrAdr]>; 1020def MOVaddrTLS 1021 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1022 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), 1023 tglobaltlsaddr:$low))]>, 1024 Sched<[WriteAdrAdr]>; 1025def MOVaddrEXT 1026 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 1027 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), 1028 texternalsym:$low))]>, 1029 Sched<[WriteAdrAdr]>; 1030// Normally AArch64addlow either gets folded into a following ldr/str, 1031// or together with an adrp into MOVaddr above. For cases with TLS, it 1032// might appear without either of them, so allow lowering it into a plain 1033// add. 1034def ADDlowTLS 1035 : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), 1036 [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, 1037 tglobaltlsaddr:$low))]>, 1038 Sched<[WriteAdr]>; 1039 1040} // isReMaterializable, isCodeGenOnly 1041 1042def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), 1043 (LOADgot tglobaltlsaddr:$addr)>; 1044 1045def : Pat<(AArch64LOADgot texternalsym:$addr), 1046 (LOADgot texternalsym:$addr)>; 1047 1048def : Pat<(AArch64LOADgot tconstpool:$addr), 1049 (LOADgot tconstpool:$addr)>; 1050 1051// In general these get lowered into a sequence of three 4-byte instructions. 1052// 32-bit jump table destination is actually only 2 instructions since we can 1053// use the table itself as a PC-relative base. But optimization occurs after 1054// branch relaxation so be pessimistic. 1055let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch", 1056 isNotDuplicable = 1 in { 1057def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 1058 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 1059 Sched<[]>; 1060def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 1061 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 1062 Sched<[]>; 1063def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 1064 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 1065 Sched<[]>; 1066} 1067 1068// Space-consuming pseudo to aid testing of placement and reachability 1069// algorithms. Immediate operand is the number of bytes this "instruction" 1070// occupies; register operands can be used to enforce dependency and constrain 1071// the scheduler. 1072let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in 1073def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn), 1074 [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>, 1075 Sched<[]>; 1076 1077let hasSideEffects = 1, isCodeGenOnly = 1 in { 1078 def SpeculationSafeValueX 1079 : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>; 1080 def SpeculationSafeValueW 1081 : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>; 1082} 1083 1084// SpeculationBarrierEndBB must only be used after an unconditional control 1085// flow, i.e. after a terminator for which isBarrier is True. 1086let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { 1087 // This gets lowered to a pair of 4-byte instructions. 1088 let Size = 8 in 1089 def SpeculationBarrierISBDSBEndBB 1090 : Pseudo<(outs), (ins), []>, Sched<[]>; 1091 // This gets lowered to a 4-byte instruction. 1092 let Size = 4 in 1093 def SpeculationBarrierSBEndBB 1094 : Pseudo<(outs), (ins), []>, Sched<[]>; 1095} 1096 1097//===----------------------------------------------------------------------===// 1098// System instructions. 1099//===----------------------------------------------------------------------===// 1100 1101def HINT : HintI<"hint">; 1102def : InstAlias<"nop", (HINT 0b000)>; 1103def : InstAlias<"yield",(HINT 0b001)>; 1104def : InstAlias<"wfe", (HINT 0b010)>; 1105def : InstAlias<"wfi", (HINT 0b011)>; 1106def : InstAlias<"sev", (HINT 0b100)>; 1107def : InstAlias<"sevl", (HINT 0b101)>; 1108def : InstAlias<"dgh", (HINT 0b110)>; 1109def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; 1110def : InstAlias<"csdb", (HINT 20)>; 1111// In order to be able to write readable assembly, LLVM should accept assembly 1112// inputs that use Branch Target Indentification mnemonics, even with BTI disabled. 1113// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1114// should not emit these mnemonics unless BTI is enabled. 1115def : InstAlias<"bti", (HINT 32), 0>; 1116def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; 1117def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; 1118def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; 1119 1120// v8.2a Statistical Profiling extension 1121def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; 1122 1123// As far as LLVM is concerned this writes to the system's exclusive monitors. 1124let mayLoad = 1, mayStore = 1 in 1125def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; 1126 1127// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot 1128// model patterns with sufficiently fine granularity. 1129let mayLoad = ?, mayStore = ? in { 1130def DMB : CRmSystemI<barrier_op, 0b101, "dmb", 1131 [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>; 1132 1133def DSB : CRmSystemI<barrier_op, 0b100, "dsb", 1134 [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>; 1135 1136def ISB : CRmSystemI<barrier_op, 0b110, "isb", 1137 [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>; 1138 1139def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> { 1140 let CRm = 0b0010; 1141 let Inst{12} = 0; 1142 let Predicates = [HasTRACEV8_4]; 1143} 1144 1145def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> { 1146 let CRm{1-0} = 0b11; 1147 let Inst{9-8} = 0b10; 1148 let Predicates = [HasXS]; 1149} 1150 1151let Predicates = [HasWFxT] in { 1152def WFET : RegInputSystemI<0b0000, 0b000, "wfet">; 1153def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">; 1154} 1155 1156// Branch Record Buffer two-word mnemonic instructions 1157class BRBEI<bits<3> op2, string keyword> 1158 : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> { 1159 let Inst{31-8} = 0b110101010000100101110010; 1160 let Inst{7-5} = op2; 1161 let Predicates = [HasBRBE]; 1162} 1163def BRB_IALL: BRBEI<0b100, "\tiall">; 1164def BRB_INJ: BRBEI<0b101, "\tinj">; 1165 1166} 1167 1168// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ 1169def : TokenAlias<"INJ", "inj">; 1170def : TokenAlias<"IALL", "iall">; 1171 1172 1173// ARMv9.4-A Guarded Control Stack 1174class GCSNoOp<bits<3> op2, string mnemonic> 1175 : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> { 1176 let Inst{20-8} = 0b0100001110111; 1177 let Inst{7-5} = op2; 1178 let Predicates = [HasGCS]; 1179} 1180def GCSPUSHX : GCSNoOp<0b100, "gcspushx">; 1181def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">; 1182def GCSPOPX : GCSNoOp<0b110, "gcspopx">; 1183 1184class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic, 1185 list<dag> pattern = []> 1186 : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> { 1187 let Inst{20-19} = 0b01; 1188 let Inst{18-16} = op1; 1189 let Inst{15-8} = 0b01110111; 1190 let Inst{7-5} = op2; 1191 let Predicates = [HasGCS]; 1192} 1193 1194def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">; 1195def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">; 1196 1197class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic, 1198 list<dag> pattern = []> 1199 : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> { 1200 let Inst{20-19} = 0b01; 1201 let Inst{18-16} = op1; 1202 let Inst{15-8} = 0b01110111; 1203 let Inst{7-5} = op2; 1204 let Predicates = [HasGCS]; 1205} 1206 1207def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">; 1208def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">; 1209def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent 1210 1211def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>; 1212def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>; 1213 1214def : TokenAlias<"DSYNC", "dsync">; 1215 1216let Uses = [X16], Defs = [X16], CRm = 0b0101 in { 1217 def CHKFEAT : SystemNoOperands<0b000, "hint\t#40">; 1218} 1219def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>; 1220def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>; 1221 1222class GCSSt<string mnemonic, bits<3> op> 1223 : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, $Rn", "", []>, Sched<[]> { 1224 bits<5> Rt; 1225 bits<5> Rn; 1226 let Inst{31-15} = 0b11011001000111110; 1227 let Inst{14-12} = op; 1228 let Inst{11-10} = 0b11; 1229 let Inst{9-5} = Rn; 1230 let Inst{4-0} = Rt; 1231 let Predicates = [HasGCS]; 1232} 1233def GCSSTR : GCSSt<"gcsstr", 0b000>; 1234def GCSSTTR : GCSSt<"gcssttr", 0b001>; 1235 1236 1237// ARMv8.2-A Dot Product 1238let Predicates = [HasDotProd] in { 1239defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>; 1240defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>; 1241defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>; 1242defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>; 1243} 1244 1245// ARMv8.6-A BFloat 1246let Predicates = [HasNEON, HasBF16] in { 1247defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">; 1248defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">; 1249def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">; 1250def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1251def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1252def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1253def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1254def BFCVTN : SIMD_BFCVTN; 1255def BFCVTN2 : SIMD_BFCVTN2; 1256 1257// Vector-scalar BFDOT: 1258// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit 1259// register (the instruction uses a single 32-bit lane from it), so the pattern 1260// is a bit tricky. 1261def : Pat<(v2f32 (int_aarch64_neon_bfdot 1262 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1263 (v4bf16 (bitconvert 1264 (v2i32 (AArch64duplane32 1265 (v4i32 (bitconvert 1266 (v8bf16 (insert_subvector undef, 1267 (v4bf16 V64:$Rm), 1268 (i64 0))))), 1269 VectorIndexS:$idx)))))), 1270 (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1271 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 1272 VectorIndexS:$idx)>; 1273} 1274 1275let Predicates = [HasNEONorSME, HasBF16] in { 1276def BFCVT : BF16ToSinglePrecision<"bfcvt">; 1277} 1278 1279// ARMv8.6A AArch64 matrix multiplication 1280let Predicates = [HasMatMulInt8] in { 1281def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>; 1282def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>; 1283def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>; 1284defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>; 1285defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>; 1286 1287// sudot lane has a pattern where usdot is expected (there is no sudot). 1288// The second operand is used in the dup operation to repeat the indexed 1289// element. 1290class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind, 1291 string rhs_kind, RegisterOperand RegType, 1292 ValueType AccumType, ValueType InputType> 1293 : BaseSIMDThreeSameVectorIndexS<Q, 0, 0b00, 0b1111, "sudot", dst_kind, 1294 lhs_kind, rhs_kind, RegType, AccumType, 1295 InputType, null_frag> { 1296 let Pattern = [(set (AccumType RegType:$dst), 1297 (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd), 1298 (InputType (bitconvert (AccumType 1299 (AArch64duplane32 (v4i32 V128:$Rm), 1300 VectorIndexS:$idx)))), 1301 (InputType RegType:$Rn))))]; 1302} 1303 1304multiclass SIMDSUDOTIndex { 1305 def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>; 1306 def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>; 1307} 1308 1309defm SUDOTlane : SIMDSUDOTIndex; 1310 1311} 1312 1313// ARMv8.2-A FP16 Fused Multiply-Add Long 1314let Predicates = [HasNEON, HasFP16FML] in { 1315defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; 1316defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>; 1317defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>; 1318defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>; 1319defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>; 1320defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>; 1321defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>; 1322defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>; 1323} 1324 1325// Armv8.2-A Crypto extensions 1326let Predicates = [HasSHA3] in { 1327def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; 1328def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">; 1329def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">; 1330def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">; 1331def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">; 1332def EOR3 : CryptoRRRR_16B<0b00, "eor3">; 1333def BCAX : CryptoRRRR_16B<0b01, "bcax">; 1334def XAR : CryptoRRRi6<"xar">; 1335 1336class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy> 1337 : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))), 1338 (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>; 1339 1340def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1341 (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1342 1343def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>; 1344def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>; 1345def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>; 1346 1347def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>; 1348def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>; 1349def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>; 1350def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>; 1351 1352class EOR3_pattern<ValueType VecTy> 1353 : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)), 1354 (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1355 1356def : EOR3_pattern<v16i8>; 1357def : EOR3_pattern<v8i16>; 1358def : EOR3_pattern<v4i32>; 1359def : EOR3_pattern<v2i64>; 1360 1361class BCAX_pattern<ValueType VecTy> 1362 : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))), 1363 (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1364 1365def : BCAX_pattern<v16i8>; 1366def : BCAX_pattern<v8i16>; 1367def : BCAX_pattern<v4i32>; 1368def : BCAX_pattern<v2i64>; 1369 1370def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>; 1371def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>; 1372def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>; 1373def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>; 1374 1375def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>; 1376def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>; 1377def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>; 1378def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>; 1379 1380def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>; 1381def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>; 1382def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>; 1383def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>; 1384 1385def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1386 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1387 1388def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))), 1389 (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>; 1390 1391def : Pat<(xor (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))), 1392 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1393 1394} // HasSHA3 1395 1396let Predicates = [HasSM4] in { 1397def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">; 1398def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">; 1399def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">; 1400def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">; 1401def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">; 1402def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">; 1403def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">; 1404def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">; 1405def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">; 1406 1407def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))), 1408 (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>; 1409 1410class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode> 1411 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1412 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1413 1414class SM3TT_pattern<Instruction INST, Intrinsic OpNode> 1415 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )), 1416 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>; 1417 1418class SM4_pattern<Instruction INST, Intrinsic OpNode> 1419 : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1420 (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1421 1422def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>; 1423def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>; 1424 1425def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>; 1426def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>; 1427def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>; 1428def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>; 1429 1430def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>; 1431def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>; 1432} // HasSM4 1433 1434let Predicates = [HasRCPC] in { 1435 // v8.3 Release Consistent Processor Consistent support, optional in v8.2. 1436 def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; 1437 def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; 1438 def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; 1439 def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; 1440} 1441 1442// v8.3a complex add and multiply-accumulate. No predicate here, that is done 1443// inside the multiclass as the FP16 versions need different predicates. 1444defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, 1445 "fcmla", null_frag>; 1446defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, 1447 "fcadd", null_frag>; 1448defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">; 1449 1450let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1451 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1452 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>; 1453 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1454 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>; 1455 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1456 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>; 1457 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1458 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; 1459} 1460 1461let Predicates = [HasComplxNum, HasNEON] in { 1462 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1463 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; 1464 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1465 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>; 1466 foreach Ty = [v4f32, v2f64] in { 1467 def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))), 1468 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>; 1469 def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))), 1470 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>; 1471 } 1472} 1473 1474multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> { 1475 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1476 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>; 1477 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1478 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>; 1479 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1480 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>; 1481 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1482 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>; 1483} 1484 1485multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> { 1486 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1487 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>; 1488 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1489 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>; 1490 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1491 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>; 1492 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1493 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>; 1494} 1495 1496 1497let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1498 defm : FCMLA_PATS<v4f16, V64>; 1499 defm : FCMLA_PATS<v8f16, V128>; 1500 1501 defm : FCMLA_LANE_PATS<v4f16, V64, 1502 (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>; 1503 defm : FCMLA_LANE_PATS<v8f16, V128, 1504 (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>; 1505} 1506let Predicates = [HasComplxNum, HasNEON] in { 1507 defm : FCMLA_PATS<v2f32, V64>; 1508 defm : FCMLA_PATS<v4f32, V128>; 1509 defm : FCMLA_PATS<v2f64, V128>; 1510 1511 defm : FCMLA_LANE_PATS<v4f32, V128, 1512 (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>; 1513} 1514 1515// v8.3a Pointer Authentication 1516// These instructions inhabit part of the hint space and so can be used for 1517// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is 1518// important for compatibility with other assemblers (e.g. GAS) when building 1519// software compatible with both CPUs that do or don't implement PA. 1520let Uses = [LR], Defs = [LR] in { 1521 def PACIAZ : SystemNoOperands<0b000, "hint\t#24">; 1522 def PACIBZ : SystemNoOperands<0b010, "hint\t#26">; 1523 let isAuthenticated = 1 in { 1524 def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">; 1525 def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">; 1526 } 1527} 1528let Uses = [LR, SP], Defs = [LR] in { 1529 def PACIASP : SystemNoOperands<0b001, "hint\t#25">; 1530 def PACIBSP : SystemNoOperands<0b011, "hint\t#27">; 1531 let isAuthenticated = 1 in { 1532 def AUTIASP : SystemNoOperands<0b101, "hint\t#29">; 1533 def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">; 1534 } 1535} 1536let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { 1537 def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">; 1538 def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">; 1539 let isAuthenticated = 1 in { 1540 def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">; 1541 def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">; 1542 } 1543} 1544 1545let Uses = [LR], Defs = [LR], CRm = 0b0000 in { 1546 def XPACLRI : SystemNoOperands<0b111, "hint\t#7">; 1547} 1548 1549// In order to be able to write readable assembly, LLVM should accept assembly 1550// inputs that use pointer authentication mnemonics, even with PA disabled. 1551// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1552// should not emit these mnemonics unless PA is enabled. 1553def : InstAlias<"paciaz", (PACIAZ), 0>; 1554def : InstAlias<"pacibz", (PACIBZ), 0>; 1555def : InstAlias<"autiaz", (AUTIAZ), 0>; 1556def : InstAlias<"autibz", (AUTIBZ), 0>; 1557def : InstAlias<"paciasp", (PACIASP), 0>; 1558def : InstAlias<"pacibsp", (PACIBSP), 0>; 1559def : InstAlias<"autiasp", (AUTIASP), 0>; 1560def : InstAlias<"autibsp", (AUTIBSP), 0>; 1561def : InstAlias<"pacia1716", (PACIA1716), 0>; 1562def : InstAlias<"pacib1716", (PACIB1716), 0>; 1563def : InstAlias<"autia1716", (AUTIA1716), 0>; 1564def : InstAlias<"autib1716", (AUTIB1716), 0>; 1565def : InstAlias<"xpaclri", (XPACLRI), 0>; 1566 1567// Pseudos 1568 1569let Uses = [LR, SP], Defs = [LR] in { 1570// Insertion point of LR signing code. 1571def PAUTH_PROLOGUE : Pseudo<(outs), (ins), []>, Sched<[]>; 1572// Insertion point of LR authentication code. 1573// The RET terminator of the containing machine basic block may be replaced 1574// with a combined RETA(A|B) instruction when rewriting this Pseudo. 1575def PAUTH_EPILOGUE : Pseudo<(outs), (ins), []>, Sched<[]>; 1576} 1577 1578// These pointer authentication instructions require armv8.3a 1579let Predicates = [HasPAuth] in { 1580 1581 // When PA is enabled, a better mnemonic should be emitted. 1582 def : InstAlias<"paciaz", (PACIAZ), 1>; 1583 def : InstAlias<"pacibz", (PACIBZ), 1>; 1584 def : InstAlias<"autiaz", (AUTIAZ), 1>; 1585 def : InstAlias<"autibz", (AUTIBZ), 1>; 1586 def : InstAlias<"paciasp", (PACIASP), 1>; 1587 def : InstAlias<"pacibsp", (PACIBSP), 1>; 1588 def : InstAlias<"autiasp", (AUTIASP), 1>; 1589 def : InstAlias<"autibsp", (AUTIBSP), 1>; 1590 def : InstAlias<"pacia1716", (PACIA1716), 1>; 1591 def : InstAlias<"pacib1716", (PACIB1716), 1>; 1592 def : InstAlias<"autia1716", (AUTIA1716), 1>; 1593 def : InstAlias<"autib1716", (AUTIB1716), 1>; 1594 def : InstAlias<"xpaclri", (XPACLRI), 1>; 1595 1596 multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm, 1597 SDPatternOperator op> { 1598 def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>; 1599 def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>; 1600 def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>; 1601 def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>; 1602 def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>; 1603 def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>; 1604 def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>; 1605 def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>; 1606 } 1607 1608 defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>; 1609 defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>; 1610 1611 def XPACI : ClearAuth<0, "xpaci">; 1612 def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>; 1613 def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>; 1614 1615 def XPACD : ClearAuth<1, "xpacd">; 1616 def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>; 1617 def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>; 1618 1619 def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>; 1620 1621 // Combined Instructions 1622 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1623 def BRAA : AuthBranchTwoOperands<0, 0, "braa">; 1624 def BRAB : AuthBranchTwoOperands<0, 1, "brab">; 1625 } 1626 let isCall = 1, Defs = [LR], Uses = [SP] in { 1627 def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; 1628 def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; 1629 } 1630 1631 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1632 def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; 1633 def BRABZ : AuthOneOperand<0b000, 1, "brabz">; 1634 } 1635 let isCall = 1, Defs = [LR], Uses = [SP] in { 1636 def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; 1637 def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; 1638 } 1639 1640 let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 1641 def RETAA : AuthReturn<0b010, 0, "retaa">; 1642 def RETAB : AuthReturn<0b010, 1, "retab">; 1643 def ERETAA : AuthReturn<0b100, 0, "eretaa">; 1644 def ERETAB : AuthReturn<0b100, 1, "eretab">; 1645 } 1646 1647 defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; 1648 defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; 1649 1650} 1651 1652// v9.5-A pointer authentication extensions 1653 1654// Always accept "pacm" as an alias for "hint #39", but don't emit it when 1655// disassembling if we don't have the pauth-lr feature. 1656let CRm = 0b0100 in { 1657 def PACM : SystemNoOperands<0b111, "hint\t#39">; 1658} 1659def : InstAlias<"pacm", (PACM), 0>; 1660 1661let Predicates = [HasPAuthLR] in { 1662 let Defs = [LR], Uses = [LR, SP] in { 1663 // opcode2, opcode, asm 1664 def PACIASPPC : SignAuthFixedRegs<0b00001, 0b101000, "paciasppc">; 1665 def PACIBSPPC : SignAuthFixedRegs<0b00001, 0b101001, "pacibsppc">; 1666 def PACNBIASPPC : SignAuthFixedRegs<0b00001, 0b100000, "pacnbiasppc">; 1667 def PACNBIBSPPC : SignAuthFixedRegs<0b00001, 0b100001, "pacnbibsppc">; 1668 // opc, asm 1669 def AUTIASPPCi : SignAuthPCRel<0b00, "autiasppc">; 1670 def AUTIBSPPCi : SignAuthPCRel<0b01, "autibsppc">; 1671 // opcode2, opcode, asm 1672 def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppc">; 1673 def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppc">; 1674 // opcode2, opcode, asm 1675 def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">; 1676 def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">; 1677 def AUTIA171615 : SignAuthFixedRegs<0b00001, 0b101110, "autia171615">; 1678 def AUTIB171615 : SignAuthFixedRegs<0b00001, 0b101111, "autib171615">; 1679 } 1680 1681 let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in { 1682 // opc, op2, asm 1683 def RETAASPPCi : SignAuthReturnPCRel<0b000, 0b11111, "retaasppc">; 1684 def RETABSPPCi : SignAuthReturnPCRel<0b001, 0b11111, "retabsppc">; 1685 // op3, asm 1686 def RETAASPPCr : SignAuthReturnReg<0b000010, "retaasppc">; 1687 def RETABSPPCr : SignAuthReturnReg<0b000011, "retabsppc">; 1688 } 1689 def : InstAlias<"pacm", (PACM), 1>; 1690} 1691 1692 1693// v8.3a floating point conversion for javascript 1694let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in 1695def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, 1696 "fjcvtzs", 1697 [(set GPR32:$Rd, 1698 (int_aarch64_fjcvtzs FPR64:$Rn))]> { 1699 let Inst{31} = 0; 1700} // HasJS, HasFPARMv8 1701 1702// v8.4 Flag manipulation instructions 1703let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in { 1704def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { 1705 let Inst{20-5} = 0b0000001000000000; 1706} 1707def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">; 1708def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; 1709def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", 1710 "{\t$Rn, $imm, $mask}">; 1711} // HasFlagM 1712 1713// v8.5 flag manipulation instructions 1714let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in { 1715 1716def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> { 1717 let Inst{18-16} = 0b000; 1718 let Inst{11-8} = 0b0000; 1719 let Unpredictable{11-8} = 0b1111; 1720 let Inst{7-5} = 0b001; 1721} 1722 1723def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> { 1724 let Inst{18-16} = 0b000; 1725 let Inst{11-8} = 0b0000; 1726 let Unpredictable{11-8} = 0b1111; 1727 let Inst{7-5} = 0b010; 1728} 1729} // HasAltNZCV 1730 1731 1732// Armv8.5-A speculation barrier 1733def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> { 1734 let Inst{20-5} = 0b0001100110000111; 1735 let Unpredictable{11-8} = 0b1111; 1736 let Predicates = [HasSB]; 1737 let hasSideEffects = 1; 1738} 1739 1740def : InstAlias<"clrex", (CLREX 0xf)>; 1741def : InstAlias<"isb", (ISB 0xf)>; 1742def : InstAlias<"ssbb", (DSB 0)>; 1743def : InstAlias<"pssbb", (DSB 4)>; 1744def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>; 1745 1746def MRS : MRSI; 1747def MSR : MSRI; 1748def MSRpstateImm1 : MSRpstateImm0_1; 1749def MSRpstateImm4 : MSRpstateImm0_15; 1750 1751def : Pat<(AArch64mrs imm:$id), 1752 (MRS imm:$id)>; 1753 1754// The thread pointer (on Linux, at least, where this has been implemented) is 1755// TPIDR_EL0. 1756def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), 1757 [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; 1758 1759// This gets lowered into a 24-byte instruction sequence 1760let Defs = [ X9, X16, X17, NZCV ], Size = 24 in { 1761def KCFI_CHECK : Pseudo< 1762 (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>; 1763} 1764 1765let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { 1766def HWASAN_CHECK_MEMACCESS : Pseudo< 1767 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1768 [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1769 Sched<[]>; 1770} 1771 1772let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in { 1773def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< 1774 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1775 [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1776 Sched<[]>; 1777} 1778 1779// The virtual cycle counter register is CNTVCT_EL0. 1780def : Pat<(readcyclecounter), (MRS 0xdf02)>; 1781 1782// FPCR register 1783let Uses = [FPCR] in 1784def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins), 1785 [(set GPR64:$dst, (int_aarch64_get_fpcr))]>, 1786 PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>, 1787 Sched<[WriteSys]>; 1788let Defs = [FPCR] in 1789def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val), 1790 [(int_aarch64_set_fpcr i64:$val)]>, 1791 PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>, 1792 Sched<[WriteSys]>; 1793 1794// Generic system instructions 1795def SYSxt : SystemXtI<0, "sys">; 1796def SYSLxt : SystemLXtI<1, "sysl">; 1797 1798def : InstAlias<"sys $op1, $Cn, $Cm, $op2", 1799 (SYSxt imm0_7:$op1, sys_cr_op:$Cn, 1800 sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 1801 1802 1803let Predicates = [HasTME] in { 1804 1805def TSTART : TMSystemI<0b0000, "tstart", 1806 [(set GPR64:$Rt, (int_aarch64_tstart))]>; 1807 1808def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; 1809 1810def TCANCEL : TMSystemException<0b011, "tcancel", 1811 [(int_aarch64_tcancel timm64_0_65535:$imm)]>; 1812 1813def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> { 1814 let mayLoad = 0; 1815 let mayStore = 0; 1816} 1817} // HasTME 1818 1819//===----------------------------------------------------------------------===// 1820// Move immediate instructions. 1821//===----------------------------------------------------------------------===// 1822 1823defm MOVK : InsertImmediate<0b11, "movk">; 1824defm MOVN : MoveImmediate<0b00, "movn">; 1825 1826let PostEncoderMethod = "fixMOVZ" in 1827defm MOVZ : MoveImmediate<0b10, "movz">; 1828 1829// First group of aliases covers an implicit "lsl #0". 1830def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>; 1831def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>; 1832def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1833def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1834def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1835def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1836 1837// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. 1838def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1839def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1840def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1841def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1842 1843def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1844def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1845def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1846def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1847 1848def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>; 1849def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>; 1850def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>; 1851def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>; 1852 1853def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1854def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1855 1856def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1857def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1858 1859def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>; 1860def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>; 1861 1862// Final group of aliases covers true "mov $Rd, $imm" cases. 1863multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, 1864 int width, int shift> { 1865 def _asmoperand : AsmOperandClass { 1866 let Name = basename # width # "_lsl" # shift # "MovAlias"; 1867 let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " 1868 # shift # ">"; 1869 let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; 1870 } 1871 1872 def _movimm : Operand<i32> { 1873 let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); 1874 } 1875 1876 def : InstAlias<"mov $Rd, $imm", 1877 (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; 1878} 1879 1880defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; 1881defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; 1882 1883defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; 1884defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; 1885defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; 1886defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; 1887 1888defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; 1889defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; 1890 1891defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; 1892defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; 1893defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; 1894defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; 1895 1896let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, 1897 isAsCheapAsAMove = 1 in { 1898// FIXME: The following pseudo instructions are only needed because remat 1899// cannot handle multiple instructions. When that changes, we can select 1900// directly to the real instructions and get rid of these pseudos. 1901 1902def MOVi32imm 1903 : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), 1904 [(set GPR32:$dst, imm:$src)]>, 1905 Sched<[WriteImm]>; 1906def MOVi64imm 1907 : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), 1908 [(set GPR64:$dst, imm:$src)]>, 1909 Sched<[WriteImm]>; 1910} // isReMaterializable, isCodeGenOnly 1911 1912// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the 1913// eventual expansion code fewer bits to worry about getting right. Marshalling 1914// the types is a little tricky though: 1915def i64imm_32bit : ImmLeaf<i64, [{ 1916 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 1917}]>; 1918 1919def s64imm_32bit : ImmLeaf<i64, [{ 1920 int64_t Imm64 = static_cast<int64_t>(Imm); 1921 return Imm64 >= std::numeric_limits<int32_t>::min() && 1922 Imm64 <= std::numeric_limits<int32_t>::max(); 1923}]>; 1924 1925def trunc_imm : SDNodeXForm<imm, [{ 1926 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); 1927}]>; 1928 1929def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, 1930 GISDNodeXFormEquiv<trunc_imm>; 1931 1932let Predicates = [OptimizedGISelOrOtherSelector] in { 1933// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless 1934// copies. 1935def : Pat<(i64 i64imm_32bit:$src), 1936 (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; 1937} 1938 1939// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). 1940def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 1941return CurDAG->getTargetConstant( 1942 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 1943}]>; 1944 1945def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 1946return CurDAG->getTargetConstant( 1947 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 1948}]>; 1949 1950 1951def : Pat<(f32 fpimm:$in), 1952 (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; 1953def : Pat<(f64 fpimm:$in), 1954 (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; 1955 1956 1957// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK 1958// sequences. 1959def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, 1960 tglobaladdr:$g1, tglobaladdr:$g0), 1961 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), 1962 tglobaladdr:$g1, 16), 1963 tglobaladdr:$g2, 32), 1964 tglobaladdr:$g3, 48)>; 1965 1966def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, 1967 tblockaddress:$g1, tblockaddress:$g0), 1968 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), 1969 tblockaddress:$g1, 16), 1970 tblockaddress:$g2, 32), 1971 tblockaddress:$g3, 48)>; 1972 1973def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, 1974 tconstpool:$g1, tconstpool:$g0), 1975 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), 1976 tconstpool:$g1, 16), 1977 tconstpool:$g2, 32), 1978 tconstpool:$g3, 48)>; 1979 1980def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, 1981 tjumptable:$g1, tjumptable:$g0), 1982 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), 1983 tjumptable:$g1, 16), 1984 tjumptable:$g2, 32), 1985 tjumptable:$g3, 48)>; 1986 1987 1988//===----------------------------------------------------------------------===// 1989// Arithmetic instructions. 1990//===----------------------------------------------------------------------===// 1991 1992// Add/subtract with carry. 1993defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; 1994defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; 1995 1996def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; 1997def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; 1998def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; 1999def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; 2000 2001// Add/subtract 2002defm ADD : AddSub<0, "add", "sub", add>; 2003defm SUB : AddSub<1, "sub", "add">; 2004 2005def : InstAlias<"mov $dst, $src", 2006 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; 2007def : InstAlias<"mov $dst, $src", 2008 (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; 2009def : InstAlias<"mov $dst, $src", 2010 (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; 2011def : InstAlias<"mov $dst, $src", 2012 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; 2013 2014defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; 2015defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; 2016 2017def copyFromSP: PatLeaf<(i64 GPR64:$src), [{ 2018 return N->getOpcode() == ISD::CopyFromReg && 2019 cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP; 2020}]>; 2021 2022// Use SUBS instead of SUB to enable CSE between SUBS and SUB. 2023def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), 2024 (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; 2025def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), 2026 (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; 2027def : Pat<(sub GPR32:$Rn, GPR32:$Rm), 2028 (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; 2029def : Pat<(sub GPR64:$Rn, GPR64:$Rm), 2030 (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; 2031def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), 2032 (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; 2033def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), 2034 (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; 2035let AddedComplexity = 1 in { 2036def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), 2037 (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; 2038def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), 2039 (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; 2040def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)), 2041 (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>; 2042} 2043 2044// Because of the immediate format for add/sub-imm instructions, the 2045// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 2046// These patterns capture that transformation. 2047let AddedComplexity = 1 in { 2048def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2049 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2050def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2051 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2052def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2053 (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2054def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2055 (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2056} 2057 2058// Because of the immediate format for add/sub-imm instructions, the 2059// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 2060// These patterns capture that transformation. 2061let AddedComplexity = 1 in { 2062def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2063 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2064def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2065 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2066def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 2067 (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 2068def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 2069 (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 2070} 2071 2072def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 2073def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 2074def : InstAlias<"neg $dst, $src$shift", 2075 (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 2076def : InstAlias<"neg $dst, $src$shift", 2077 (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 2078 2079def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 2080def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 2081def : InstAlias<"negs $dst, $src$shift", 2082 (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 2083def : InstAlias<"negs $dst, $src$shift", 2084 (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 2085 2086 2087// Unsigned/Signed divide 2088defm UDIV : Div<0, "udiv", udiv>; 2089defm SDIV : Div<1, "sdiv", sdiv>; 2090 2091def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; 2092def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; 2093def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; 2094def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; 2095 2096// Variable shift 2097defm ASRV : Shift<0b10, "asr", sra>; 2098defm LSLV : Shift<0b00, "lsl", shl>; 2099defm LSRV : Shift<0b01, "lsr", srl>; 2100defm RORV : Shift<0b11, "ror", rotr>; 2101 2102def : ShiftAlias<"asrv", ASRVWr, GPR32>; 2103def : ShiftAlias<"asrv", ASRVXr, GPR64>; 2104def : ShiftAlias<"lslv", LSLVWr, GPR32>; 2105def : ShiftAlias<"lslv", LSLVXr, GPR64>; 2106def : ShiftAlias<"lsrv", LSRVWr, GPR32>; 2107def : ShiftAlias<"lsrv", LSRVXr, GPR64>; 2108def : ShiftAlias<"rorv", RORVWr, GPR32>; 2109def : ShiftAlias<"rorv", RORVXr, GPR64>; 2110 2111// Multiply-add 2112let AddedComplexity = 5 in { 2113defm MADD : MulAccum<0, "madd">; 2114defm MSUB : MulAccum<1, "msub">; 2115 2116def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), 2117 (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 2118def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), 2119 (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 2120 2121def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), 2122 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 2123def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), 2124 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 2125def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), 2126 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 2127def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), 2128 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 2129} // AddedComplexity = 5 2130 2131let AddedComplexity = 5 in { 2132def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; 2133def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; 2134def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; 2135def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; 2136 2137def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))), 2138 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2139def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))), 2140 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2141def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), 2142 (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2143def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))), 2144 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2145def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))), 2146 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2147def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), 2148 (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2149 2150def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), 2151 (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2152def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), 2153 (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2154 2155def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), 2156 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2157def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), 2158 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2159def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), 2160 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2161 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2162 2163def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 2164 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2165def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 2166 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2167def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), 2168 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2169 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2170 2171def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), 2172 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2173def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), 2174 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2175def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), 2176 GPR64:$Ra)), 2177 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2178 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2179 2180def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 2181 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2182def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 2183 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2184def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), 2185 (s64imm_32bit:$C)))), 2186 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2187 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2188 2189def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)), 2190 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2191def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))), 2192 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2193 2194def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)), 2195 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2196def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)), 2197 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2198 2199def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 2200 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2201def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 2202 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2203 2204def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 2205 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2206def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 2207 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2208 2209def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)), 2210 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2211def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))), 2212 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2213 2214def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)), 2215 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2216def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)), 2217 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2218 2219def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))), 2220 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2221def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 2222 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2223 2224def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))), 2225 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2226def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 2227 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2228} // AddedComplexity = 5 2229 2230def : MulAccumWAlias<"mul", MADDWrrr>; 2231def : MulAccumXAlias<"mul", MADDXrrr>; 2232def : MulAccumWAlias<"mneg", MSUBWrrr>; 2233def : MulAccumXAlias<"mneg", MSUBXrrr>; 2234def : WideMulAccumAlias<"smull", SMADDLrrr>; 2235def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; 2236def : WideMulAccumAlias<"umull", UMADDLrrr>; 2237def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; 2238 2239// Multiply-high 2240def SMULHrr : MulHi<0b010, "smulh", mulhs>; 2241def UMULHrr : MulHi<0b110, "umulh", mulhu>; 2242 2243// CRC32 2244def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; 2245def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; 2246def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; 2247def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; 2248 2249def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; 2250def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; 2251def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; 2252def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; 2253 2254// v8.1 atomic CAS 2255defm CAS : CompareAndSwap<0, 0, "">; 2256defm CASA : CompareAndSwap<1, 0, "a">; 2257defm CASL : CompareAndSwap<0, 1, "l">; 2258defm CASAL : CompareAndSwap<1, 1, "al">; 2259 2260// v8.1 atomic CASP 2261defm CASP : CompareAndSwapPair<0, 0, "">; 2262defm CASPA : CompareAndSwapPair<1, 0, "a">; 2263defm CASPL : CompareAndSwapPair<0, 1, "l">; 2264defm CASPAL : CompareAndSwapPair<1, 1, "al">; 2265 2266// v8.1 atomic SWP 2267defm SWP : Swap<0, 0, "">; 2268defm SWPA : Swap<1, 0, "a">; 2269defm SWPL : Swap<0, 1, "l">; 2270defm SWPAL : Swap<1, 1, "al">; 2271 2272// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) 2273defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; 2274defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; 2275defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; 2276defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; 2277 2278defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; 2279defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; 2280defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; 2281defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; 2282 2283defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; 2284defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; 2285defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; 2286defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; 2287 2288defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; 2289defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; 2290defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; 2291defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; 2292 2293defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; 2294defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; 2295defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; 2296defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; 2297 2298defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; 2299defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; 2300defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; 2301defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; 2302 2303defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; 2304defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; 2305defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; 2306defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; 2307 2308defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; 2309defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; 2310defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; 2311defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; 2312 2313// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" 2314defm : STOPregister<"stadd","LDADD">; // STADDx 2315defm : STOPregister<"stclr","LDCLR">; // STCLRx 2316defm : STOPregister<"steor","LDEOR">; // STEORx 2317defm : STOPregister<"stset","LDSET">; // STSETx 2318defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx 2319defm : STOPregister<"stsmin","LDSMIN">;// STSMINx 2320defm : STOPregister<"stumax","LDUMAX">;// STUMAXx 2321defm : STOPregister<"stumin","LDUMIN">;// STUMINx 2322 2323// v8.5 Memory Tagging Extension 2324let Predicates = [HasMTE] in { 2325 2326def IRG : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg", 2327 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>; 2328 2329def GMI : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi", 2330 int_aarch64_gmi, GPR64sp>, Sched<[]> { 2331 let isNotDuplicable = 1; 2332} 2333def ADDG : AddSubG<0, "addg", null_frag>; 2334def SUBG : AddSubG<1, "subg", null_frag>; 2335 2336def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>; 2337 2338def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>; 2339def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{ 2340 let Defs = [NZCV]; 2341} 2342 2343def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>; 2344 2345def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">; 2346 2347def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4), 2348 (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>; 2349def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2350 (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2351 2352def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>; 2353 2354def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]", 2355 (outs GPR64:$Rt), (ins GPR64sp:$Rn)>; 2356def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]", 2357 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>; 2358def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]", 2359 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> { 2360 let Inst{23} = 0; 2361} 2362 2363defm STG : MemTagStore<0b00, "stg">; 2364defm STZG : MemTagStore<0b01, "stzg">; 2365defm ST2G : MemTagStore<0b10, "st2g">; 2366defm STZ2G : MemTagStore<0b11, "stz2g">; 2367 2368def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2369 (STGi $Rn, $Rm, $imm)>; 2370def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2371 (STZGi $Rn, $Rm, $imm)>; 2372def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2373 (ST2Gi $Rn, $Rm, $imm)>; 2374def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2375 (STZ2Gi $Rn, $Rm, $imm)>; 2376 2377defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">; 2378def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">; 2379def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; 2380 2381def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2382 (STGi GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2383 2384def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2), 2385 (STGPi $Rt, $Rt2, $Rn, $imm)>; 2386 2387def IRGstack 2388 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>, 2389 Sched<[]>; 2390def TAGPstack 2391 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>, 2392 Sched<[]>; 2393 2394// Explicit SP in the first operand prevents ShrinkWrap optimization 2395// from leaving this instruction out of the stack frame. When IRGstack 2396// is transformed into IRG, this operand is replaced with the actual 2397// register / expression for the tagged base pointer of the current function. 2398def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; 2399 2400// Large STG to be expanded into a loop. $sz is the size, $Rn is start address. 2401// $Rn_wback is one past the end of the range. $Rm is the loop counter. 2402let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in { 2403def STGloop_wback 2404 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2405 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2406 Sched<[WriteAdr, WriteST]>; 2407 2408def STZGloop_wback 2409 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2410 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2411 Sched<[WriteAdr, WriteST]>; 2412 2413// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn. 2414// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back). 2415def STGloop 2416 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2417 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2418 Sched<[WriteAdr, WriteST]>; 2419 2420def STZGloop 2421 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2422 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2423 Sched<[WriteAdr, WriteST]>; 2424} 2425 2426} // Predicates = [HasMTE] 2427 2428//===----------------------------------------------------------------------===// 2429// Logical instructions. 2430//===----------------------------------------------------------------------===// 2431 2432// (immediate) 2433defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; 2434defm AND : LogicalImm<0b00, "and", and, "bic">; 2435defm EOR : LogicalImm<0b10, "eor", xor, "eon">; 2436defm ORR : LogicalImm<0b01, "orr", or, "orn">; 2437 2438// FIXME: these aliases *are* canonical sometimes (when movz can't be 2439// used). Actually, it seems to be working right now, but putting logical_immXX 2440// here is a bit dodgy on the AsmParser side too. 2441def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, 2442 logical_imm32:$imm), 0>; 2443def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, 2444 logical_imm64:$imm), 0>; 2445 2446 2447// (register) 2448defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; 2449defm BICS : LogicalRegS<0b11, 1, "bics", 2450 BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; 2451defm AND : LogicalReg<0b00, 0, "and", and>; 2452defm BIC : LogicalReg<0b00, 1, "bic", 2453 BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>; 2454defm EON : LogicalReg<0b10, 1, "eon", 2455 BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; 2456defm EOR : LogicalReg<0b10, 0, "eor", xor>; 2457defm ORN : LogicalReg<0b01, 1, "orn", 2458 BinOpFrag<(or node:$LHS, (not node:$RHS))>>; 2459defm ORR : LogicalReg<0b01, 0, "orr", or>; 2460 2461def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; 2462def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; 2463 2464def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; 2465def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; 2466 2467def : InstAlias<"mvn $Wd, $Wm$sh", 2468 (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; 2469def : InstAlias<"mvn $Xd, $Xm$sh", 2470 (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; 2471 2472def : InstAlias<"tst $src1, $src2", 2473 (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; 2474def : InstAlias<"tst $src1, $src2", 2475 (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; 2476 2477def : InstAlias<"tst $src1, $src2", 2478 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; 2479def : InstAlias<"tst $src1, $src2", 2480 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; 2481 2482def : InstAlias<"tst $src1, $src2$sh", 2483 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; 2484def : InstAlias<"tst $src1, $src2$sh", 2485 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; 2486 2487 2488def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; 2489def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; 2490 2491 2492//===----------------------------------------------------------------------===// 2493// One operand data processing instructions. 2494//===----------------------------------------------------------------------===// 2495 2496defm CLS : OneOperandData<0b000101, "cls">; 2497defm CLZ : OneOperandData<0b000100, "clz", ctlz>; 2498defm RBIT : OneOperandData<0b000000, "rbit", bitreverse>; 2499 2500def REV16Wr : OneWRegData<0b000001, "rev16", 2501 UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; 2502def REV16Xr : OneXRegData<0b000001, "rev16", null_frag>; 2503 2504def : Pat<(cttz GPR32:$Rn), 2505 (CLZWr (RBITWr GPR32:$Rn))>; 2506def : Pat<(cttz GPR64:$Rn), 2507 (CLZXr (RBITXr GPR64:$Rn))>; 2508def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), 2509 (i32 1))), 2510 (CLSWr GPR32:$Rn)>; 2511def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), 2512 (i64 1))), 2513 (CLSXr GPR64:$Rn)>; 2514def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>; 2515def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>; 2516 2517// Unlike the other one operand instructions, the instructions with the "rev" 2518// mnemonic do *not* just different in the size bit, but actually use different 2519// opcode bits for the different sizes. 2520def REVWr : OneWRegData<0b000010, "rev", bswap>; 2521def REVXr : OneXRegData<0b000011, "rev", bswap>; 2522def REV32Xr : OneXRegData<0b000010, "rev32", 2523 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; 2524 2525def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; 2526 2527// The bswap commutes with the rotr so we want a pattern for both possible 2528// orders. 2529def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; 2530def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; 2531 2532// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero. 2533def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; 2534def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; 2535 2536def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)), 2537 (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))), 2538 (REV16Xr GPR64:$Rn)>; 2539 2540//===----------------------------------------------------------------------===// 2541// Bitfield immediate extraction instruction. 2542//===----------------------------------------------------------------------===// 2543let hasSideEffects = 0 in 2544defm EXTR : ExtractImm<"extr">; 2545def : InstAlias<"ror $dst, $src, $shift", 2546 (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; 2547def : InstAlias<"ror $dst, $src, $shift", 2548 (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; 2549 2550def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), 2551 (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; 2552def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), 2553 (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; 2554 2555//===----------------------------------------------------------------------===// 2556// Other bitfield immediate instructions. 2557//===----------------------------------------------------------------------===// 2558let hasSideEffects = 0 in { 2559defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; 2560defm SBFM : BitfieldImm<0b00, "sbfm">; 2561defm UBFM : BitfieldImm<0b10, "ubfm">; 2562} 2563 2564def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2565 uint64_t enc = (32 - N->getZExtValue()) & 0x1f; 2566 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2567}]>; 2568 2569def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2570 uint64_t enc = 31 - N->getZExtValue(); 2571 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2572}]>; 2573 2574// min(7, 31 - shift_amt) 2575def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2576 uint64_t enc = 31 - N->getZExtValue(); 2577 enc = enc > 7 ? 7 : enc; 2578 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2579}]>; 2580 2581// min(15, 31 - shift_amt) 2582def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2583 uint64_t enc = 31 - N->getZExtValue(); 2584 enc = enc > 15 ? 15 : enc; 2585 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2586}]>; 2587 2588def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2589 uint64_t enc = (64 - N->getZExtValue()) & 0x3f; 2590 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2591}]>; 2592 2593def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2594 uint64_t enc = 63 - N->getZExtValue(); 2595 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2596}]>; 2597 2598// min(7, 63 - shift_amt) 2599def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2600 uint64_t enc = 63 - N->getZExtValue(); 2601 enc = enc > 7 ? 7 : enc; 2602 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2603}]>; 2604 2605// min(15, 63 - shift_amt) 2606def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2607 uint64_t enc = 63 - N->getZExtValue(); 2608 enc = enc > 15 ? 15 : enc; 2609 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2610}]>; 2611 2612// min(31, 63 - shift_amt) 2613def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ 2614 uint64_t enc = 63 - N->getZExtValue(); 2615 enc = enc > 31 ? 31 : enc; 2616 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2617}]>; 2618 2619def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), 2620 (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 2621 (i64 (i32shift_b imm0_31:$imm)))>; 2622def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), 2623 (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 2624 (i64 (i64shift_b imm0_63:$imm)))>; 2625 2626let AddedComplexity = 10 in { 2627def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), 2628 (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2629def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), 2630 (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2631} 2632 2633def : InstAlias<"asr $dst, $src, $shift", 2634 (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2635def : InstAlias<"asr $dst, $src, $shift", 2636 (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2637def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2638def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2639def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2640def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2641def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2642 2643def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), 2644 (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2645def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), 2646 (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2647 2648def : InstAlias<"lsr $dst, $src, $shift", 2649 (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2650def : InstAlias<"lsr $dst, $src, $shift", 2651 (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2652def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2653def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2654def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2655def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2656def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2657 2658//===----------------------------------------------------------------------===// 2659// Conditional comparison instructions. 2660//===----------------------------------------------------------------------===// 2661defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; 2662defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; 2663 2664//===----------------------------------------------------------------------===// 2665// Conditional select instructions. 2666//===----------------------------------------------------------------------===// 2667defm CSEL : CondSelect<0, 0b00, "csel">; 2668 2669def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; 2670defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; 2671defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; 2672defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; 2673 2674def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2675 (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2676def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2677 (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2678def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2679 (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2680def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2681 (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2682def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2683 (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2684def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2685 (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2686 2687def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), 2688 (CSINCWr WZR, WZR, (i32 imm:$cc))>; 2689def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), 2690 (CSINCXr XZR, XZR, (i32 imm:$cc))>; 2691def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), 2692 (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2693def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), 2694 (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2695def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), 2696 (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2697def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), 2698 (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2699def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), 2700 (CSINVWr WZR, WZR, (i32 imm:$cc))>; 2701def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), 2702 (CSINVXr XZR, XZR, (i32 imm:$cc))>; 2703def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), 2704 (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2705def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), 2706 (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2707def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), 2708 (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2709def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), 2710 (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2711 2712def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2713 (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>; 2714def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2715 (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>; 2716 2717def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2718 (CSINCWr GPR32:$val, WZR, imm:$cc)>; 2719def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2720 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2721def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2722 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2723 2724def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2725 (CSELWr WZR, GPR32:$val, imm:$cc)>; 2726def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2727 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2728def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2729 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2730 2731// The inverse of the condition code from the alias instruction is what is used 2732// in the aliased instruction. The parser all ready inverts the condition code 2733// for these aliases. 2734def : InstAlias<"cset $dst, $cc", 2735 (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2736def : InstAlias<"cset $dst, $cc", 2737 (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2738 2739def : InstAlias<"csetm $dst, $cc", 2740 (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2741def : InstAlias<"csetm $dst, $cc", 2742 (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2743 2744def : InstAlias<"cinc $dst, $src, $cc", 2745 (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2746def : InstAlias<"cinc $dst, $src, $cc", 2747 (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2748 2749def : InstAlias<"cinv $dst, $src, $cc", 2750 (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2751def : InstAlias<"cinv $dst, $src, $cc", 2752 (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2753 2754def : InstAlias<"cneg $dst, $src, $cc", 2755 (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2756def : InstAlias<"cneg $dst, $src, $cc", 2757 (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2758 2759//===----------------------------------------------------------------------===// 2760// PC-relative instructions. 2761//===----------------------------------------------------------------------===// 2762let isReMaterializable = 1 in { 2763let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 2764def ADR : ADRI<0, "adr", adrlabel, 2765 [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>; 2766} // hasSideEffects = 0 2767 2768def ADRP : ADRI<1, "adrp", adrplabel, 2769 [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; 2770} // isReMaterializable = 1 2771 2772// page address of a constant pool entry, block address 2773def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>; 2774def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>; 2775def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>; 2776def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>; 2777def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; 2778def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; 2779def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; 2780 2781//===----------------------------------------------------------------------===// 2782// Unconditional branch (register) instructions. 2783//===----------------------------------------------------------------------===// 2784 2785let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 2786def RET : BranchReg<0b0010, "ret", []>; 2787def DRPS : SpecialReturn<0b0101, "drps">; 2788def ERET : SpecialReturn<0b0100, "eret">; 2789} // isReturn = 1, isTerminator = 1, isBarrier = 1 2790 2791// Default to the LR register. 2792def : InstAlias<"ret", (RET LR)>; 2793 2794let isCall = 1, Defs = [LR], Uses = [SP] in { 2795 def BLR : BranchReg<0b0001, "blr", []>; 2796 def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>, 2797 Sched<[WriteBrReg]>, 2798 PseudoInstExpansion<(BLR GPR64:$Rn)>; 2799 def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, 2800 Sched<[WriteBrReg]>; 2801 def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>, 2802 Sched<[WriteBrReg]>; 2803} // isCall 2804 2805def : Pat<(AArch64call GPR64:$Rn), 2806 (BLR GPR64:$Rn)>, 2807 Requires<[NoSLSBLRMitigation]>; 2808def : Pat<(AArch64call GPR64noip:$Rn), 2809 (BLRNoIP GPR64noip:$Rn)>, 2810 Requires<[SLSBLRMitigation]>; 2811 2812def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), 2813 (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, 2814 Requires<[NoSLSBLRMitigation]>; 2815 2816def : Pat<(AArch64call_bti GPR64:$Rn), 2817 (BLR_BTI GPR64:$Rn)>, 2818 Requires<[NoSLSBLRMitigation]>; 2819def : Pat<(AArch64call_bti GPR64noip:$Rn), 2820 (BLR_BTI GPR64noip:$Rn)>, 2821 Requires<[SLSBLRMitigation]>; 2822 2823let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 2824def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; 2825} // isBranch, isTerminator, isBarrier, isIndirectBranch 2826 2827// Create a separate pseudo-instruction for codegen to use so that we don't 2828// flag lr as used in every function. It'll be restored before the RET by the 2829// epilogue if it's legitimately used. 2830def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>, 2831 Sched<[WriteBrReg]> { 2832 let isTerminator = 1; 2833 let isBarrier = 1; 2834 let isReturn = 1; 2835} 2836 2837// This is a directive-like pseudo-instruction. The purpose is to insert an 2838// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction 2839// (which in the usual case is a BLR). 2840let hasSideEffects = 1 in 2841def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { 2842 let AsmString = ".tlsdesccall $sym"; 2843} 2844 2845// Pseudo instruction to tell the streamer to emit a 'B' character into the 2846// augmentation string. 2847def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {} 2848 2849// Pseudo instruction to tell the streamer to emit a 'G' character into the 2850// augmentation string. 2851def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {} 2852 2853// FIXME: maybe the scratch register used shouldn't be fixed to X1? 2854// FIXME: can "hasSideEffects be dropped? 2855// This gets lowered to an instruction sequence which takes 16 bytes 2856let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16, 2857 isCodeGenOnly = 1 in 2858def TLSDESC_CALLSEQ 2859 : Pseudo<(outs), (ins i64imm:$sym), 2860 [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, 2861 Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; 2862def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), 2863 (TLSDESC_CALLSEQ texternalsym:$sym)>; 2864 2865//===----------------------------------------------------------------------===// 2866// Conditional branch (immediate) instruction. 2867//===----------------------------------------------------------------------===// 2868def Bcc : BranchCond<0, "b">; 2869 2870// Armv8.8-A variant form which hints to the branch predictor that 2871// this branch is very likely to go the same way nearly all the time 2872// (even though it is not known at compile time _which_ way that is). 2873def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; 2874 2875//===----------------------------------------------------------------------===// 2876// Compare-and-branch instructions. 2877//===----------------------------------------------------------------------===// 2878defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; 2879defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; 2880 2881//===----------------------------------------------------------------------===// 2882// Test-bit-and-branch instructions. 2883//===----------------------------------------------------------------------===// 2884defm TBZ : TestBranch<0, "tbz", AArch64tbz>; 2885defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; 2886 2887//===----------------------------------------------------------------------===// 2888// Unconditional branch (immediate) instructions. 2889//===----------------------------------------------------------------------===// 2890let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 2891def B : BranchImm<0, "b", [(br bb:$addr)]>; 2892} // isBranch, isTerminator, isBarrier 2893 2894let isCall = 1, Defs = [LR], Uses = [SP] in { 2895def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; 2896} // isCall 2897def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; 2898 2899//===----------------------------------------------------------------------===// 2900// Exception generation instructions. 2901//===----------------------------------------------------------------------===// 2902let isTrap = 1 in { 2903def BRK : ExceptionGeneration<0b001, 0b00, "brk", 2904 [(int_aarch64_break timm32_0_65535:$imm)]>; 2905} 2906def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; 2907def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; 2908def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>; 2909def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; 2910def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; 2911def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>; 2912def SVC : ExceptionGeneration<0b000, 0b01, "svc">; 2913 2914// DCPSn defaults to an immediate operand of zero if unspecified. 2915def : InstAlias<"dcps1", (DCPS1 0)>; 2916def : InstAlias<"dcps2", (DCPS2 0)>; 2917def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>; 2918 2919def UDF : UDFType<0, "udf">; 2920 2921//===----------------------------------------------------------------------===// 2922// Load instructions. 2923//===----------------------------------------------------------------------===// 2924 2925// Pair (indexed, offset) 2926defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; 2927defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; 2928defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">; 2929defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">; 2930defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">; 2931 2932defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2933 2934// Pair (pre-indexed) 2935def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2936def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2937def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2938def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2939def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2940 2941def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2942 2943// Pair (post-indexed) 2944def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2945def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2946def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2947def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2948def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2949 2950def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2951 2952 2953// Pair (no allocate) 2954defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">; 2955defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">; 2956defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">; 2957defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">; 2958defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">; 2959 2960def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 2961 (LDPXi GPR64sp:$Rn, simm7s8:$offset)>; 2962 2963def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 2964 (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>; 2965//--- 2966// (register offset) 2967//--- 2968 2969// Integer 2970defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; 2971defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; 2972defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; 2973defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; 2974 2975// Floating-point 2976defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", i8, load>; 2977defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; 2978defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; 2979defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; 2980defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>; 2981 2982// Load sign-extended half-word 2983defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; 2984defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; 2985 2986// Load sign-extended byte 2987defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; 2988defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; 2989 2990// Load sign-extended word 2991defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; 2992 2993// Pre-fetch. 2994defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; 2995 2996// For regular load, we do not have any alignment requirement. 2997// Thus, it is safe to directly map the vector loads with interesting 2998// addressing modes. 2999// FIXME: We could do the same for bitconvert to floating point vectors. 3000multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, 3001 ValueType ScalTy, ValueType VecTy, 3002 Instruction LOADW, Instruction LOADX, 3003 SubRegIndex sub> { 3004 def : Pat<(VecTy (scalar_to_vector (ScalTy 3005 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), 3006 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 3007 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), 3008 sub)>; 3009 3010 def : Pat<(VecTy (scalar_to_vector (ScalTy 3011 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), 3012 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 3013 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), 3014 sub)>; 3015} 3016 3017let AddedComplexity = 10 in { 3018defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; 3019defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; 3020 3021defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; 3022defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; 3023 3024defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>; 3025defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>; 3026 3027defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; 3028defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; 3029 3030defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; 3031defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; 3032 3033defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; 3034 3035defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; 3036 3037 3038def : Pat <(v1i64 (scalar_to_vector (i64 3039 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, 3040 ro_Wextend64:$extend))))), 3041 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; 3042 3043def : Pat <(v1i64 (scalar_to_vector (i64 3044 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, 3045 ro_Xextend64:$extend))))), 3046 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; 3047} 3048 3049// Match all load 64 bits width whose type is compatible with FPR64 3050multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, 3051 Instruction LOADW, Instruction LOADX> { 3052 3053 def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 3054 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3055 3056 def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 3057 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3058} 3059 3060let AddedComplexity = 10 in { 3061let Predicates = [IsLE] in { 3062 // We must do vector loads with LD1 in big-endian. 3063 defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; 3064 defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; 3065 defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; 3066 defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; 3067 defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>; 3068 defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>; 3069} 3070 3071defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; 3072defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; 3073 3074// Match all load 128 bits width whose type is compatible with FPR128 3075let Predicates = [IsLE] in { 3076 // We must do vector loads with LD1 in big-endian. 3077 defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; 3078 defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; 3079 defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; 3080 defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; 3081 defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; 3082 defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>; 3083 defm : VecROLoadPat<ro128, v8bf16, LDRQroW, LDRQroX>; 3084 defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; 3085} 3086} // AddedComplexity = 10 3087 3088// zextload -> i64 3089multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, 3090 Instruction INSTW, Instruction INSTX> { 3091 def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 3092 (SUBREG_TO_REG (i64 0), 3093 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 3094 sub_32)>; 3095 3096 def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 3097 (SUBREG_TO_REG (i64 0), 3098 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 3099 sub_32)>; 3100} 3101 3102let AddedComplexity = 10 in { 3103 defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; 3104 defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; 3105 defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; 3106 3107 // zextloadi1 -> zextloadi8 3108 defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 3109 3110 // extload -> zextload 3111 defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 3112 defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 3113 defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 3114 3115 // extloadi1 -> zextloadi8 3116 defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; 3117} 3118 3119 3120// zextload -> i64 3121multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, 3122 Instruction INSTW, Instruction INSTX> { 3123 def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 3124 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3125 3126 def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 3127 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3128 3129} 3130 3131let AddedComplexity = 10 in { 3132 // extload -> zextload 3133 defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 3134 defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 3135 defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 3136 3137 // zextloadi1 -> zextloadi8 3138 defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 3139} 3140 3141//--- 3142// (unsigned immediate) 3143//--- 3144defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr", 3145 [(set GPR64z:$Rt, 3146 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 3147defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr", 3148 [(set GPR32z:$Rt, 3149 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 3150defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", 3151 [(set FPR8Op:$Rt, 3152 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; 3153defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", 3154 [(set (f16 FPR16Op:$Rt), 3155 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; 3156defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr", 3157 [(set (f32 FPR32Op:$Rt), 3158 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 3159defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr", 3160 [(set (f64 FPR64Op:$Rt), 3161 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 3162defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr", 3163 [(set (f128 FPR128Op:$Rt), 3164 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; 3165 3166// bf16 load pattern 3167def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3168 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 3169 3170// For regular load, we do not have any alignment requirement. 3171// Thus, it is safe to directly map the vector loads with interesting 3172// addressing modes. 3173// FIXME: We could do the same for bitconvert to floating point vectors. 3174def : Pat <(v8i8 (scalar_to_vector (i32 3175 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3176 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 3177 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 3178def : Pat <(v16i8 (scalar_to_vector (i32 3179 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3180 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3181 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 3182def : Pat <(v4i16 (scalar_to_vector (i32 3183 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3184 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 3185 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 3186def : Pat <(v8i16 (scalar_to_vector (i32 3187 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3188 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 3189 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 3190def : Pat <(v2i32 (scalar_to_vector (i32 3191 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3192 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 3193 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 3194def : Pat <(v4i32 (scalar_to_vector (i32 3195 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3196 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 3197 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 3198def : Pat <(v1i64 (scalar_to_vector (i64 3199 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 3200 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3201def : Pat <(v2i64 (scalar_to_vector (i64 3202 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 3203 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 3204 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; 3205 3206// Match all load 64 bits width whose type is compatible with FPR64 3207let Predicates = [IsLE] in { 3208 // We must use LD1 to perform vector loads in big-endian. 3209 def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3210 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3211 def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3212 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3213 def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3214 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3215 def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3216 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3217 def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3218 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3219 def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3220 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3221} 3222def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3223 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3224def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3225 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3226 3227// Match all load 128 bits width whose type is compatible with FPR128 3228let Predicates = [IsLE] in { 3229 // We must use LD1 to perform vector loads in big-endian. 3230 def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3231 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3232 def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3233 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3234 def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3235 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3236 def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3237 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3238 def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3239 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3240 def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3241 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3242 def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3243 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3244 def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3245 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3246} 3247def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3248 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3249 3250defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", 3251 [(set GPR32:$Rt, 3252 (zextloadi16 (am_indexed16 GPR64sp:$Rn, 3253 uimm12s2:$offset)))]>; 3254defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", 3255 [(set GPR32:$Rt, 3256 (zextloadi8 (am_indexed8 GPR64sp:$Rn, 3257 uimm12s1:$offset)))]>; 3258// zextload -> i64 3259def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3260 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3261def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3262 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3263 3264// zextloadi1 -> zextloadi8 3265def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3266 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3267def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3268 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3269 3270// extload -> zextload 3271def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3272 (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; 3273def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3274 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3275def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3276 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3277def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3278 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3279def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3280 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3281def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3282 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3283def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3284 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3285 3286// load sign-extended half-word 3287defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", 3288 [(set GPR32:$Rt, 3289 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3290 uimm12s2:$offset)))]>; 3291defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", 3292 [(set GPR64:$Rt, 3293 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3294 uimm12s2:$offset)))]>; 3295 3296// load sign-extended byte 3297defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", 3298 [(set GPR32:$Rt, 3299 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3300 uimm12s1:$offset)))]>; 3301defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", 3302 [(set GPR64:$Rt, 3303 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3304 uimm12s1:$offset)))]>; 3305 3306// load sign-extended word 3307defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", 3308 [(set GPR64:$Rt, 3309 (sextloadi32 (am_indexed32 GPR64sp:$Rn, 3310 uimm12s4:$offset)))]>; 3311 3312// load zero-extended word 3313def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3314 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3315 3316// Pre-fetch. 3317def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", 3318 [(AArch64Prefetch timm:$Rt, 3319 (am_indexed64 GPR64sp:$Rn, 3320 uimm12s8:$offset))]>; 3321 3322def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; 3323 3324//--- 3325// (literal) 3326 3327def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{ 3328 if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) { 3329 const DataLayout &DL = MF->getDataLayout(); 3330 Align Align = G->getGlobal()->getPointerAlignment(DL); 3331 return Align >= 4 && G->getOffset() % 4 == 0; 3332 } 3333 if (auto *C = dyn_cast<ConstantPoolSDNode>(N)) 3334 return C->getAlign() >= 4 && C->getOffset() % 4 == 0; 3335 return false; 3336}]>; 3337 3338def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr", 3339 [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3340def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr", 3341 [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3342def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr", 3343 [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3344def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr", 3345 [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3346def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr", 3347 [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3348 3349// load sign-extended word 3350def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw", 3351 [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>; 3352 3353let AddedComplexity = 20 in { 3354def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))), 3355 (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>; 3356} 3357 3358// prefetch 3359def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; 3360// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; 3361 3362//--- 3363// (unscaled immediate) 3364defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur", 3365 [(set GPR64z:$Rt, 3366 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3367defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur", 3368 [(set GPR32z:$Rt, 3369 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3370defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", 3371 [(set FPR8Op:$Rt, 3372 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3373defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", 3374 [(set (f16 FPR16Op:$Rt), 3375 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3376defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", 3377 [(set (f32 FPR32Op:$Rt), 3378 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3379defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur", 3380 [(set (f64 FPR64Op:$Rt), 3381 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3382defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur", 3383 [(set (f128 FPR128Op:$Rt), 3384 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; 3385 3386defm LDURHH 3387 : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", 3388 [(set GPR32:$Rt, 3389 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3390defm LDURBB 3391 : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", 3392 [(set GPR32:$Rt, 3393 (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3394 3395// bf16 load pattern 3396def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3397 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 3398 3399// Match all load 64 bits width whose type is compatible with FPR64 3400let Predicates = [IsLE] in { 3401 def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3402 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3403 def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3404 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3405 def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3406 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3407 def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3408 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3409 def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3410 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3411} 3412def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3413 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3414def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3415 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3416 3417// Match all load 128 bits width whose type is compatible with FPR128 3418let Predicates = [IsLE] in { 3419 def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3420 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3421 def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3422 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3423 def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3424 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3425 def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3426 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3427 def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3428 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3429 def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3430 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3431 def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3432 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3433} 3434 3435// anyext -> zext 3436def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3437 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3438def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3439 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3440def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3441 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3442def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3443 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3444def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3445 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3446def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3447 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3448def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3449 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3450// unscaled zext 3451def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3452 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3453def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3454 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3455def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3456 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3457def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3458 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3459def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3460 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3461def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3462 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3463def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3464 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3465 3466 3467//--- 3468// LDR mnemonics fall back to LDUR for negative or unaligned offsets. 3469 3470// Define new assembler match classes as we want to only match these when 3471// the don't otherwise match the scaled addressing mode for LDR/STR. Don't 3472// associate a DiagnosticType either, as we want the diagnostic for the 3473// canonical form (the scaled operand) to take precedence. 3474class SImm9OffsetOperand<int Width> : AsmOperandClass { 3475 let Name = "SImm9OffsetFB" # Width; 3476 let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; 3477 let RenderMethod = "addImmOperands"; 3478} 3479 3480def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; 3481def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; 3482def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; 3483def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; 3484def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; 3485 3486def simm9_offset_fb8 : Operand<i64> { 3487 let ParserMatchClass = SImm9OffsetFB8Operand; 3488} 3489def simm9_offset_fb16 : Operand<i64> { 3490 let ParserMatchClass = SImm9OffsetFB16Operand; 3491} 3492def simm9_offset_fb32 : Operand<i64> { 3493 let ParserMatchClass = SImm9OffsetFB32Operand; 3494} 3495def simm9_offset_fb64 : Operand<i64> { 3496 let ParserMatchClass = SImm9OffsetFB64Operand; 3497} 3498def simm9_offset_fb128 : Operand<i64> { 3499 let ParserMatchClass = SImm9OffsetFB128Operand; 3500} 3501 3502def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3503 (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3504def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3505 (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3506def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3507 (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3508def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3509 (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3510def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3511 (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3512def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3513 (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3514def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3515 (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 3516 3517// zextload -> i64 3518def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3519 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3520def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3521 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3522 3523// load sign-extended half-word 3524defm LDURSHW 3525 : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", 3526 [(set GPR32:$Rt, 3527 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3528defm LDURSHX 3529 : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", 3530 [(set GPR64:$Rt, 3531 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3532 3533// load sign-extended byte 3534defm LDURSBW 3535 : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", 3536 [(set GPR32:$Rt, 3537 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3538defm LDURSBX 3539 : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", 3540 [(set GPR64:$Rt, 3541 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3542 3543// load sign-extended word 3544defm LDURSW 3545 : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", 3546 [(set GPR64:$Rt, 3547 (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3548 3549// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. 3550def : InstAlias<"ldrb $Rt, [$Rn, $offset]", 3551 (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3552def : InstAlias<"ldrh $Rt, [$Rn, $offset]", 3553 (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3554def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3555 (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3556def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3557 (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3558def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3559 (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3560def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3561 (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3562def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", 3563 (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3564 3565// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros, 3566// load, 0) can use a single load. 3567multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT, 3568 ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst, 3569 ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm, 3570 SubRegIndex SubReg> { 3571 // Scaled 3572 def : Pat <(vector_insert (VT immAllZerosV), 3573 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3574 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3575 // Unscaled 3576 def : Pat <(vector_insert (VT immAllZerosV), 3577 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3578 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3579 3580 // Half-vector patterns 3581 def : Pat <(vector_insert (HVT immAllZerosV), 3582 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3583 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3584 // Unscaled 3585 def : Pat <(vector_insert (HVT immAllZerosV), 3586 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3587 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3588 3589 // SVE patterns 3590 def : Pat <(vector_insert (SVT immAllZerosV), 3591 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3592 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3593 // Unscaled 3594 def : Pat <(vector_insert (SVT immAllZerosV), 3595 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3596 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3597} 3598 3599defm : LoadInsertZeroPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32, LDRBui, LDURBi, 3600 am_indexed8, am_unscaled8, uimm12s1, bsub>; 3601defm : LoadInsertZeroPatterns<extloadi16, v8i16, v4i16, nxv8i16, i32, LDRHui, LDURHi, 3602 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3603defm : LoadInsertZeroPatterns<load, v4i32, v2i32, nxv4i32, i32, LDRSui, LDURSi, 3604 am_indexed32, am_unscaled32, uimm12s4, ssub>; 3605defm : LoadInsertZeroPatterns<load, v2i64, v1i64, nxv2i64, i64, LDRDui, LDURDi, 3606 am_indexed64, am_unscaled64, uimm12s8, dsub>; 3607defm : LoadInsertZeroPatterns<load, v8f16, v4f16, nxv8f16, f16, LDRHui, LDURHi, 3608 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3609defm : LoadInsertZeroPatterns<load, v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi, 3610 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3611defm : LoadInsertZeroPatterns<load, v4f32, v2f32, nxv4f32, f32, LDRSui, LDURSi, 3612 am_indexed32, am_unscaled32, uimm12s4, ssub>; 3613defm : LoadInsertZeroPatterns<load, v2f64, v1f64, nxv2f64, f64, LDRDui, LDURDi, 3614 am_indexed64, am_unscaled64, uimm12s8, dsub>; 3615 3616// Pre-fetch. 3617defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", 3618 [(AArch64Prefetch timm:$Rt, 3619 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3620 3621//--- 3622// (unscaled immediate, unprivileged) 3623defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; 3624defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; 3625 3626defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; 3627defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; 3628 3629// load sign-extended half-word 3630defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; 3631defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; 3632 3633// load sign-extended byte 3634defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; 3635defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; 3636 3637// load sign-extended word 3638defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; 3639 3640//--- 3641// (immediate pre-indexed) 3642def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3643def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3644def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3645def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3646def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3647def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3648def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3649 3650// load sign-extended half-word 3651def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3652def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3653 3654// load sign-extended byte 3655def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3656def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3657 3658// load zero-extended byte 3659def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3660def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3661 3662// load sign-extended word 3663def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3664 3665//--- 3666// (immediate post-indexed) 3667def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3668def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3669def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3670def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3671def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3672def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3673def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3674 3675// load sign-extended half-word 3676def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3677def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3678 3679// load sign-extended byte 3680def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3681def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3682 3683// load zero-extended byte 3684def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3685def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3686 3687// load sign-extended word 3688def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3689 3690//===----------------------------------------------------------------------===// 3691// Store instructions. 3692//===----------------------------------------------------------------------===// 3693 3694// Pair (indexed, offset) 3695// FIXME: Use dedicated range-checked addressing mode operand here. 3696defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">; 3697defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">; 3698defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">; 3699defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">; 3700defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">; 3701 3702// Pair (pre-indexed) 3703def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3704def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3705def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3706def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3707def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3708 3709// Pair (post-indexed) 3710def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3711def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3712def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3713def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3714def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3715 3716// Pair (no allocate) 3717defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">; 3718defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">; 3719defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">; 3720defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">; 3721defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">; 3722 3723def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 3724 (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>; 3725 3726def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 3727 (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>; 3728 3729 3730//--- 3731// (Register offset) 3732 3733// Integer 3734defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; 3735defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; 3736defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; 3737defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; 3738 3739 3740// Floating-point 3741defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", i8, store>; 3742defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; 3743defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; 3744defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; 3745defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">; 3746 3747let Predicates = [UseSTRQro], AddedComplexity = 10 in { 3748 def : Pat<(store (f128 FPR128:$Rt), 3749 (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, 3750 ro_Wextend128:$extend)), 3751 (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; 3752 def : Pat<(store (f128 FPR128:$Rt), 3753 (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, 3754 ro_Xextend128:$extend)), 3755 (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; 3756} 3757 3758multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop, 3759 Instruction STRW, Instruction STRX> { 3760 3761 def : Pat<(storeop GPR64:$Rt, 3762 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3763 (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3764 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3765 3766 def : Pat<(storeop GPR64:$Rt, 3767 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3768 (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3769 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3770} 3771 3772let AddedComplexity = 10 in { 3773 // truncstore i64 3774 defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>; 3775 defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>; 3776 defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>; 3777} 3778 3779multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR, 3780 Instruction STRW, Instruction STRX> { 3781 def : Pat<(store (VecTy FPR:$Rt), 3782 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3783 (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3784 3785 def : Pat<(store (VecTy FPR:$Rt), 3786 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3787 (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3788} 3789 3790let AddedComplexity = 10 in { 3791// Match all store 64 bits width whose type is compatible with FPR64 3792let Predicates = [IsLE] in { 3793 // We must use ST1 to store vectors in big-endian. 3794 defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>; 3795 defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>; 3796 defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>; 3797 defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>; 3798 defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>; 3799 defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>; 3800} 3801 3802defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>; 3803defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>; 3804 3805// Match all store 128 bits width whose type is compatible with FPR128 3806let Predicates = [IsLE, UseSTRQro] in { 3807 // We must use ST1 to store vectors in big-endian. 3808 defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>; 3809 defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>; 3810 defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>; 3811 defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>; 3812 defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>; 3813 defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>; 3814 defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>; 3815 defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>; 3816} 3817} // AddedComplexity = 10 3818 3819// Match stores from lane 0 to the appropriate subreg's store. 3820multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop, 3821 ValueType VecTy, ValueType STy, 3822 ValueType SubRegTy, 3823 SubRegIndex SubRegIdx, 3824 Instruction STRW, Instruction STRX> { 3825 3826 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))), 3827 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3828 (STRW (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), 3829 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3830 3831 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))), 3832 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3833 (STRX (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), 3834 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3835} 3836 3837let AddedComplexity = 19 in { 3838 defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, f16, hsub, STRHroW, STRHroX>; 3839 defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, f16, hsub, STRHroW, STRHroX>; 3840 defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, i32, ssub, STRSroW, STRSroX>; 3841 defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, i32, ssub, STRSroW, STRSroX>; 3842 defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, i64, dsub, STRDroW, STRDroX>; 3843 defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, i64, dsub, STRDroW, STRDroX>; 3844} 3845 3846//--- 3847// (unsigned immediate) 3848defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", 3849 [(store GPR64z:$Rt, 3850 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3851defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", 3852 [(store GPR32z:$Rt, 3853 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3854defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", 3855 [(store FPR8Op:$Rt, 3856 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; 3857defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", 3858 [(store (f16 FPR16Op:$Rt), 3859 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; 3860defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str", 3861 [(store (f32 FPR32Op:$Rt), 3862 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3863defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str", 3864 [(store (f64 FPR64Op:$Rt), 3865 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3866defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>; 3867 3868defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", 3869 [(truncstorei16 GPR32z:$Rt, 3870 (am_indexed16 GPR64sp:$Rn, 3871 uimm12s2:$offset))]>; 3872defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", 3873 [(truncstorei8 GPR32z:$Rt, 3874 (am_indexed8 GPR64sp:$Rn, 3875 uimm12s1:$offset))]>; 3876 3877// bf16 store pattern 3878def : Pat<(store (bf16 FPR16Op:$Rt), 3879 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3880 (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>; 3881 3882let AddedComplexity = 10 in { 3883 3884// Match all store 64 bits width whose type is compatible with FPR64 3885def : Pat<(store (v1i64 FPR64:$Rt), 3886 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3887 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3888def : Pat<(store (v1f64 FPR64:$Rt), 3889 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3890 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3891 3892let Predicates = [IsLE] in { 3893 // We must use ST1 to store vectors in big-endian. 3894 def : Pat<(store (v2f32 FPR64:$Rt), 3895 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3896 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3897 def : Pat<(store (v8i8 FPR64:$Rt), 3898 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3899 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3900 def : Pat<(store (v4i16 FPR64:$Rt), 3901 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3902 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3903 def : Pat<(store (v2i32 FPR64:$Rt), 3904 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3905 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3906 def : Pat<(store (v4f16 FPR64:$Rt), 3907 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3908 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3909 def : Pat<(store (v4bf16 FPR64:$Rt), 3910 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3911 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3912} 3913 3914// Match all store 128 bits width whose type is compatible with FPR128 3915def : Pat<(store (f128 FPR128:$Rt), 3916 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3917 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3918 3919let Predicates = [IsLE] in { 3920 // We must use ST1 to store vectors in big-endian. 3921 def : Pat<(store (v4f32 FPR128:$Rt), 3922 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3923 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3924 def : Pat<(store (v2f64 FPR128:$Rt), 3925 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3926 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3927 def : Pat<(store (v16i8 FPR128:$Rt), 3928 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3929 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3930 def : Pat<(store (v8i16 FPR128:$Rt), 3931 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3932 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3933 def : Pat<(store (v4i32 FPR128:$Rt), 3934 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3935 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3936 def : Pat<(store (v2i64 FPR128:$Rt), 3937 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3938 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3939 def : Pat<(store (v8f16 FPR128:$Rt), 3940 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3941 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3942 def : Pat<(store (v8bf16 FPR128:$Rt), 3943 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3944 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3945} 3946 3947// truncstore i64 3948def : Pat<(truncstorei32 GPR64:$Rt, 3949 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), 3950 (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; 3951def : Pat<(truncstorei16 GPR64:$Rt, 3952 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3953 (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; 3954def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), 3955 (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; 3956 3957} // AddedComplexity = 10 3958 3959// Match stores from lane 0 to the appropriate subreg's store. 3960multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop, 3961 ValueType VTy, ValueType STy, 3962 ValueType SubRegTy, 3963 SubRegIndex SubRegIdx, Operand IndexType, 3964 Instruction STR> { 3965 def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), (i64 0))), 3966 (UIAddrMode GPR64sp:$Rn, IndexType:$offset)), 3967 (STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), 3968 GPR64sp:$Rn, IndexType:$offset)>; 3969} 3970 3971let AddedComplexity = 19 in { 3972 defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>; 3973 defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, f16, hsub, uimm12s2, STRHui>; 3974 defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, i32, ssub, uimm12s4, STRSui>; 3975 defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, i32, ssub, uimm12s4, STRSui>; 3976 defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, i64, dsub, uimm12s8, STRDui>; 3977 defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, i64, dsub, uimm12s8, STRDui>; 3978} 3979 3980//--- 3981// (unscaled immediate) 3982defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur", 3983 [(store GPR64z:$Rt, 3984 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3985defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur", 3986 [(store GPR32z:$Rt, 3987 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 3988defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", 3989 [(store FPR8Op:$Rt, 3990 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 3991defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", 3992 [(store (f16 FPR16Op:$Rt), 3993 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 3994defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur", 3995 [(store (f32 FPR32Op:$Rt), 3996 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 3997defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur", 3998 [(store (f64 FPR64Op:$Rt), 3999 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 4000defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur", 4001 [(store (f128 FPR128Op:$Rt), 4002 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; 4003defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh", 4004 [(truncstorei16 GPR32z:$Rt, 4005 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 4006defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb", 4007 [(truncstorei8 GPR32z:$Rt, 4008 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 4009 4010// bf16 store pattern 4011def : Pat<(store (bf16 FPR16Op:$Rt), 4012 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 4013 (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4014 4015// Armv8.4 Weaker Release Consistency enhancements 4016// LDAPR & STLR with Immediate Offset instructions 4017let Predicates = [HasRCPC_IMMO] in { 4018defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; 4019defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; 4020defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; 4021defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>; 4022defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>; 4023defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>; 4024defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>; 4025defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>; 4026defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>; 4027defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>; 4028defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>; 4029defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>; 4030defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>; 4031} 4032 4033// Match all store 64 bits width whose type is compatible with FPR64 4034def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4035 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4036def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4037 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4038 4039let AddedComplexity = 10 in { 4040 4041let Predicates = [IsLE] in { 4042 // We must use ST1 to store vectors in big-endian. 4043 def : Pat<(store (v2f32 FPR64:$Rt), 4044 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4045 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4046 def : Pat<(store (v8i8 FPR64:$Rt), 4047 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4048 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4049 def : Pat<(store (v4i16 FPR64:$Rt), 4050 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4051 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4052 def : Pat<(store (v2i32 FPR64:$Rt), 4053 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4054 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4055 def : Pat<(store (v4f16 FPR64:$Rt), 4056 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4057 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4058 def : Pat<(store (v4bf16 FPR64:$Rt), 4059 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 4060 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4061} 4062 4063// Match all store 128 bits width whose type is compatible with FPR128 4064def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4065 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4066 4067let Predicates = [IsLE] in { 4068 // We must use ST1 to store vectors in big-endian. 4069 def : Pat<(store (v4f32 FPR128:$Rt), 4070 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4071 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4072 def : Pat<(store (v2f64 FPR128:$Rt), 4073 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4074 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4075 def : Pat<(store (v16i8 FPR128:$Rt), 4076 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4077 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4078 def : Pat<(store (v8i16 FPR128:$Rt), 4079 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4080 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4081 def : Pat<(store (v4i32 FPR128:$Rt), 4082 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4083 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4084 def : Pat<(store (v2i64 FPR128:$Rt), 4085 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4086 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4087 def : Pat<(store (v2f64 FPR128:$Rt), 4088 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4089 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4090 def : Pat<(store (v8f16 FPR128:$Rt), 4091 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4092 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4093 def : Pat<(store (v8bf16 FPR128:$Rt), 4094 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 4095 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 4096} 4097 4098} // AddedComplexity = 10 4099 4100// unscaled i64 truncating stores 4101def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), 4102 (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 4103def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 4104 (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 4105def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), 4106 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 4107 4108// Match stores from lane 0 to the appropriate subreg's store. 4109multiclass VecStoreULane0Pat<SDPatternOperator StoreOp, 4110 ValueType VTy, ValueType STy, 4111 ValueType SubRegTy, 4112 SubRegIndex SubRegIdx, Instruction STR> { 4113 defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegTy, SubRegIdx, simm9, STR>; 4114} 4115 4116let AddedComplexity = 19 in { 4117 defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>; 4118 defm : VecStoreULane0Pat<store, v8f16, f16, f16, hsub, STURHi>; 4119 defm : VecStoreULane0Pat<store, v4i32, i32, i32, ssub, STURSi>; 4120 defm : VecStoreULane0Pat<store, v4f32, f32, i32, ssub, STURSi>; 4121 defm : VecStoreULane0Pat<store, v2i64, i64, i64, dsub, STURDi>; 4122 defm : VecStoreULane0Pat<store, v2f64, f64, i64, dsub, STURDi>; 4123} 4124 4125//--- 4126// STR mnemonics fall back to STUR for negative or unaligned offsets. 4127def : InstAlias<"str $Rt, [$Rn, $offset]", 4128 (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 4129def : InstAlias<"str $Rt, [$Rn, $offset]", 4130 (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 4131def : InstAlias<"str $Rt, [$Rn, $offset]", 4132 (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 4133def : InstAlias<"str $Rt, [$Rn, $offset]", 4134 (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 4135def : InstAlias<"str $Rt, [$Rn, $offset]", 4136 (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 4137def : InstAlias<"str $Rt, [$Rn, $offset]", 4138 (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 4139def : InstAlias<"str $Rt, [$Rn, $offset]", 4140 (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 4141 4142def : InstAlias<"strb $Rt, [$Rn, $offset]", 4143 (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 4144def : InstAlias<"strh $Rt, [$Rn, $offset]", 4145 (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 4146 4147//--- 4148// (unscaled immediate, unprivileged) 4149defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; 4150defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; 4151 4152defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; 4153defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; 4154 4155//--- 4156// (immediate pre-indexed) 4157def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; 4158def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; 4159def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, i8>; 4160def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; 4161def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; 4162def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; 4163def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; 4164 4165def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; 4166def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; 4167 4168// truncstore i64 4169def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4170 (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4171 simm9:$off)>; 4172def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4173 (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4174 simm9:$off)>; 4175def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4176 (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4177 simm9:$off)>; 4178 4179def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4180 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4181def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4182 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4183def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4184 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4185def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4186 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4187def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4188 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4189def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4190 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4191def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4192 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4193 4194def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4195 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4196def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4197 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4198def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4199 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4200def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4201 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4202def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4203 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4204def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4205 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4206def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4207 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4208 4209//--- 4210// (immediate post-indexed) 4211def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; 4212def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; 4213def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, i8>; 4214def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; 4215def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; 4216def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; 4217def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>; 4218 4219def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>; 4220def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>; 4221 4222// truncstore i64 4223def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4224 (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4225 simm9:$off)>; 4226def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4227 (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4228 simm9:$off)>; 4229def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4230 (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4231 simm9:$off)>; 4232 4233def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), 4234 (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; 4235 4236def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4237 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4238def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4239 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4240def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4241 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4242def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4243 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4244def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4245 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4246def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4247 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4248def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4249 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4250def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4251 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4252 4253def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4254 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4255def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4256 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4257def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4258 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4259def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4260 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4261def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4262 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4263def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4264 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4265def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4266 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4267def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4268 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4269 4270//===----------------------------------------------------------------------===// 4271// Load/store exclusive instructions. 4272//===----------------------------------------------------------------------===// 4273 4274def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; 4275def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; 4276def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; 4277def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; 4278 4279def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; 4280def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; 4281def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; 4282def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; 4283 4284def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; 4285def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; 4286def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; 4287def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; 4288 4289def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; 4290def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; 4291def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; 4292def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; 4293 4294/* 4295Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn 4296of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an 4297alias for the case of immediate #0. This is because new STLR versions (from 4298LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not 4299appropriate anymore (it parses and discards the optional zero). This is not the 4300case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed, 4301and the immediate values are not inside the [] brackets and thus not accepted 4302by GPR64sp0 parser. 4303*/ 4304def STLRW0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW GPR32: $Rt, GPR64sp:$Rn)>; 4305def STLRX0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX GPR64: $Rt, GPR64sp:$Rn)>; 4306def STLRB0 : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB GPR32: $Rt, GPR64sp:$Rn)>; 4307def STLRH0 : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH GPR32: $Rt, GPR64sp:$Rn)>; 4308 4309def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; 4310def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; 4311def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; 4312def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; 4313 4314def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; 4315def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; 4316def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; 4317def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; 4318 4319def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; 4320def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; 4321 4322def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; 4323def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; 4324 4325def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; 4326def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; 4327 4328def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; 4329def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; 4330 4331let Predicates = [HasLOR] in { 4332 // v8.1a "Limited Order Region" extension load-acquire instructions 4333 def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; 4334 def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; 4335 def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; 4336 def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; 4337 4338 // v8.1a "Limited Order Region" extension store-release instructions 4339 def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; 4340 def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; 4341 def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; 4342 def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; 4343 4344 // Aliases for when offset=0 4345 def STLLRW0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRW GPR32: $Rt, GPR64sp:$Rn)>; 4346 def STLLRX0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRX GPR64: $Rt, GPR64sp:$Rn)>; 4347 def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]", (STLLRB GPR32: $Rt, GPR64sp:$Rn)>; 4348 def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]", (STLLRH GPR32: $Rt, GPR64sp:$Rn)>; 4349} 4350 4351//===----------------------------------------------------------------------===// 4352// Scaled floating point to integer conversion instructions. 4353//===----------------------------------------------------------------------===// 4354 4355defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; 4356defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; 4357defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; 4358defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; 4359defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; 4360defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; 4361defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; 4362defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; 4363defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4364defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4365defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4366defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4367 4368// AArch64's FCVT instructions saturate when out of range. 4369multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> { 4370 let Predicates = [HasFullFP16] in { 4371 def : Pat<(i32 (to_int_sat f16:$Rn, i32)), 4372 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4373 def : Pat<(i64 (to_int_sat f16:$Rn, i64)), 4374 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4375 } 4376 def : Pat<(i32 (to_int_sat f32:$Rn, i32)), 4377 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4378 def : Pat<(i64 (to_int_sat f32:$Rn, i64)), 4379 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4380 def : Pat<(i32 (to_int_sat f64:$Rn, i32)), 4381 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4382 def : Pat<(i64 (to_int_sat f64:$Rn, i64)), 4383 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4384 4385 let Predicates = [HasFullFP16] in { 4386 def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), 4387 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4388 def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), 4389 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4390 } 4391 def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), 4392 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4393 def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), 4394 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4395 def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), 4396 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4397 def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), 4398 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4399} 4400 4401defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">; 4402defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">; 4403 4404multiclass FPToIntegerIntPats<Intrinsic round, string INST> { 4405 let Predicates = [HasFullFP16] in { 4406 def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>; 4407 def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>; 4408 } 4409 def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>; 4410 def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>; 4411 def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>; 4412 def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>; 4413 4414 let Predicates = [HasFullFP16] in { 4415 def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), 4416 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4417 def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), 4418 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4419 } 4420 def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), 4421 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4422 def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), 4423 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4424 def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), 4425 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4426 def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), 4427 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4428} 4429 4430defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">; 4431defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">; 4432 4433multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> { 4434 def : Pat<(i32 (to_int (round f32:$Rn))), 4435 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4436 def : Pat<(i64 (to_int (round f32:$Rn))), 4437 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4438 def : Pat<(i32 (to_int (round f64:$Rn))), 4439 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4440 def : Pat<(i64 (to_int (round f64:$Rn))), 4441 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4442 4443 // These instructions saturate like fp_to_[su]int_sat. 4444 let Predicates = [HasFullFP16] in { 4445 def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), 4446 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4447 def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), 4448 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4449 } 4450 def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), 4451 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4452 def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), 4453 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4454 def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), 4455 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4456 def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), 4457 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4458} 4459 4460defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">; 4461defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">; 4462defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">; 4463defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">; 4464defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">; 4465defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">; 4466defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">; 4467defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">; 4468 4469 4470 4471let Predicates = [HasFullFP16] in { 4472 def : Pat<(i32 (any_lround f16:$Rn)), 4473 (FCVTASUWHr f16:$Rn)>; 4474 def : Pat<(i64 (any_lround f16:$Rn)), 4475 (FCVTASUXHr f16:$Rn)>; 4476 def : Pat<(i64 (any_llround f16:$Rn)), 4477 (FCVTASUXHr f16:$Rn)>; 4478} 4479def : Pat<(i32 (any_lround f32:$Rn)), 4480 (FCVTASUWSr f32:$Rn)>; 4481def : Pat<(i32 (any_lround f64:$Rn)), 4482 (FCVTASUWDr f64:$Rn)>; 4483def : Pat<(i64 (any_lround f32:$Rn)), 4484 (FCVTASUXSr f32:$Rn)>; 4485def : Pat<(i64 (any_lround f64:$Rn)), 4486 (FCVTASUXDr f64:$Rn)>; 4487def : Pat<(i64 (any_llround f32:$Rn)), 4488 (FCVTASUXSr f32:$Rn)>; 4489def : Pat<(i64 (any_llround f64:$Rn)), 4490 (FCVTASUXDr f64:$Rn)>; 4491 4492//===----------------------------------------------------------------------===// 4493// Scaled integer to floating point conversion instructions. 4494//===----------------------------------------------------------------------===// 4495 4496defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; 4497defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; 4498 4499def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), 4500 (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>; 4501def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)), 4502 (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>; 4503def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)), 4504 (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>; 4505 4506def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)), 4507 (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>; 4508def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)), 4509 (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>; 4510def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)), 4511 (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>; 4512 4513def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)), 4514 (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>; 4515def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)), 4516 (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>; 4517def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)), 4518 (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>; 4519 4520def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), 4521 (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>; 4522def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)), 4523 (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>; 4524def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)), 4525 (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>; 4526 4527//===----------------------------------------------------------------------===// 4528// Unscaled integer to floating point conversion instruction. 4529//===----------------------------------------------------------------------===// 4530 4531defm FMOV : UnscaledConversion<"fmov">; 4532 4533// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable 4534let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { 4535def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, 4536 Sched<[WriteF]>; 4537def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, 4538 Sched<[WriteF]>; 4539def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, 4540 Sched<[WriteF]>; 4541} 4542 4543// Similarly add aliases 4544def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, 4545 Requires<[HasFullFP16]>; 4546def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; 4547def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; 4548 4549def : Pat<(bf16 fpimm0), 4550 (FMOVH0)>; 4551 4552// Pattern for FP16 and BF16 immediates 4553let Predicates = [HasFullFP16] in { 4554 def : Pat<(f16 fpimm:$in), 4555 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; 4556 4557 def : Pat<(bf16 fpimm:$in), 4558 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>; 4559} 4560 4561//===----------------------------------------------------------------------===// 4562// Floating point conversion instruction. 4563//===----------------------------------------------------------------------===// 4564 4565defm FCVT : FPConversion<"fcvt">; 4566 4567//===----------------------------------------------------------------------===// 4568// Floating point single operand instructions. 4569//===----------------------------------------------------------------------===// 4570 4571defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>; 4572defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">; 4573defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>; 4574defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; 4575defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; 4576defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; 4577defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; 4578defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; 4579 4580defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; 4581defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; 4582 4583let SchedRW = [WriteFDiv] in { 4584defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; 4585} 4586 4587let Predicates = [HasFRInt3264] in { 4588 defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>; 4589 defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>; 4590 defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>; 4591 defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; 4592} // HasFRInt3264 4593 4594// Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions 4595def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))), 4596 (FRINT32ZDr FPR64:$Rn)>; 4597def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))), 4598 (FRINT64ZDr FPR64:$Rn)>; 4599def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))), 4600 (FRINT32XDr FPR64:$Rn)>; 4601def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))), 4602 (FRINT64XDr FPR64:$Rn)>; 4603 4604// Emitting strict_lrint as two instructions is valid as any exceptions that 4605// occur will happen in exactly one of the instructions (e.g. if the input is 4606// not an integer the inexact exception will happen in the FRINTX but not then 4607// in the FCVTZS as the output of FRINTX is an integer). 4608let Predicates = [HasFullFP16] in { 4609 def : Pat<(i32 (any_lrint f16:$Rn)), 4610 (FCVTZSUWHr (FRINTXHr f16:$Rn))>; 4611 def : Pat<(i64 (any_lrint f16:$Rn)), 4612 (FCVTZSUXHr (FRINTXHr f16:$Rn))>; 4613 def : Pat<(i64 (any_llrint f16:$Rn)), 4614 (FCVTZSUXHr (FRINTXHr f16:$Rn))>; 4615} 4616def : Pat<(i32 (any_lrint f32:$Rn)), 4617 (FCVTZSUWSr (FRINTXSr f32:$Rn))>; 4618def : Pat<(i32 (any_lrint f64:$Rn)), 4619 (FCVTZSUWDr (FRINTXDr f64:$Rn))>; 4620def : Pat<(i64 (any_lrint f32:$Rn)), 4621 (FCVTZSUXSr (FRINTXSr f32:$Rn))>; 4622def : Pat<(i64 (any_lrint f64:$Rn)), 4623 (FCVTZSUXDr (FRINTXDr f64:$Rn))>; 4624def : Pat<(i64 (any_llrint f32:$Rn)), 4625 (FCVTZSUXSr (FRINTXSr f32:$Rn))>; 4626def : Pat<(i64 (any_llrint f64:$Rn)), 4627 (FCVTZSUXDr (FRINTXDr f64:$Rn))>; 4628 4629//===----------------------------------------------------------------------===// 4630// Floating point two operand instructions. 4631//===----------------------------------------------------------------------===// 4632 4633defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; 4634let SchedRW = [WriteFDiv] in { 4635defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; 4636} 4637defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; 4638defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; 4639defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; 4640defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; 4641let SchedRW = [WriteFMul] in { 4642defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; 4643defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; 4644} 4645defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; 4646 4647multiclass FMULScalarFromIndexedLane0Patterns<string inst, 4648 string inst_f16_suffix, 4649 string inst_f32_suffix, 4650 string inst_f64_suffix, 4651 SDPatternOperator OpNode, 4652 list<Predicate> preds = []> { 4653 let Predicates = !listconcat(preds, [HasFullFP16]) in { 4654 def : Pat<(f16 (OpNode (f16 FPR16:$Rn), 4655 (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))), 4656 (!cast<Instruction>(inst # inst_f16_suffix) 4657 FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)))>; 4658 } 4659 let Predicates = preds in { 4660 def : Pat<(f32 (OpNode (f32 FPR32:$Rn), 4661 (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))), 4662 (!cast<Instruction>(inst # inst_f32_suffix) 4663 FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>; 4664 def : Pat<(f64 (OpNode (f64 FPR64:$Rn), 4665 (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))), 4666 (!cast<Instruction>(inst # inst_f64_suffix) 4667 FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>; 4668 } 4669} 4670 4671defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr", 4672 any_fmul>; 4673 4674// Match reassociated forms of FNMUL. 4675def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)), 4676 (FNMULHrr FPR16:$a, FPR16:$b)>, 4677 Requires<[HasFullFP16]>; 4678def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)), 4679 (FNMULSrr FPR32:$a, FPR32:$b)>; 4680def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)), 4681 (FNMULDrr FPR64:$a, FPR64:$b)>; 4682 4683def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4684 (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; 4685def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4686 (FMINDrr FPR64:$Rn, FPR64:$Rm)>; 4687def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4688 (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; 4689def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4690 (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; 4691 4692//===----------------------------------------------------------------------===// 4693// Floating point three operand instructions. 4694//===----------------------------------------------------------------------===// 4695 4696defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; 4697defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", 4698 TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; 4699defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", 4700 TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; 4701defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", 4702 TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; 4703 4704// The following def pats catch the case where the LHS of an FMA is negated. 4705// The TriOpFrag above catches the case where the middle operand is negated. 4706 4707// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike 4708// the NEON variant. 4709 4710// Here we handle first -(a + b*c) for FNMADD: 4711 4712let Predicates = [HasNEON, HasFullFP16] in 4713def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), 4714 (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4715 4716def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), 4717 (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4718 4719def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), 4720 (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4721 4722// Now it's time for "(-a) + (-b)*c" 4723 4724let Predicates = [HasNEON, HasFullFP16] in 4725def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), 4726 (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4727 4728def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), 4729 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4730 4731def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), 4732 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4733 4734//===----------------------------------------------------------------------===// 4735// Floating point comparison instructions. 4736//===----------------------------------------------------------------------===// 4737 4738defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>; 4739defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>; 4740 4741//===----------------------------------------------------------------------===// 4742// Floating point conditional comparison instructions. 4743//===----------------------------------------------------------------------===// 4744 4745defm FCCMPE : FPCondComparison<1, "fccmpe">; 4746defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; 4747 4748//===----------------------------------------------------------------------===// 4749// Floating point conditional select instruction. 4750//===----------------------------------------------------------------------===// 4751 4752defm FCSEL : FPCondSelect<"fcsel">; 4753 4754let Predicates = [HasFullFP16] in 4755def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)), 4756 (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>; 4757 4758// CSEL instructions providing f128 types need to be handled by a 4759// pseudo-instruction since the eventual code will need to introduce basic 4760// blocks and control flow. 4761def F128CSEL : Pseudo<(outs FPR128:$Rd), 4762 (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), 4763 [(set (f128 FPR128:$Rd), 4764 (AArch64csel FPR128:$Rn, FPR128:$Rm, 4765 (i32 imm:$cond), NZCV))]> { 4766 let Uses = [NZCV]; 4767 let usesCustomInserter = 1; 4768 let hasNoSchedulingInfo = 1; 4769} 4770 4771//===----------------------------------------------------------------------===// 4772// Instructions used for emitting unwind opcodes on ARM64 Windows. 4773//===----------------------------------------------------------------------===// 4774let isPseudo = 1 in { 4775 def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>; 4776 def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4777 def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4778 def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4779 def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4780 def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4781 def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4782 def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4783 def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4784 def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4785 def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4786 def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>; 4787 def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4788 def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>; 4789 def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4790 def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>; 4791 def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4792 def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>; 4793} 4794 4795// Pseudo instructions for Windows EH 4796//===----------------------------------------------------------------------===// 4797let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, 4798 isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in { 4799 def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>; 4800 let usesCustomInserter = 1 in 4801 def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>, 4802 Sched<[]>; 4803} 4804 4805// Pseudo instructions for homogeneous prolog/epilog 4806let isPseudo = 1 in { 4807 // Save CSRs in order, {FPOffset} 4808 def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4809 // Restore CSRs in order 4810 def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4811} 4812 4813//===----------------------------------------------------------------------===// 4814// Floating point immediate move. 4815//===----------------------------------------------------------------------===// 4816 4817let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 4818defm FMOV : FPMoveImmediate<"fmov">; 4819} 4820 4821let Predicates = [HasFullFP16] in { 4822 def : Pat<(bf16 fpimmbf16:$in), 4823 (FMOVHi (fpimm16XForm bf16:$in))>; 4824} 4825 4826//===----------------------------------------------------------------------===// 4827// Advanced SIMD two vector instructions. 4828//===----------------------------------------------------------------------===// 4829 4830defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", 4831 AArch64uabd>; 4832// Match UABDL in log2-shuffle patterns. 4833def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), 4834 (zext (v8i8 V64:$opB))))), 4835 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4836def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4837 (v8i16 (add (sub (zext (v8i8 V64:$opA)), 4838 (zext (v8i8 V64:$opB))), 4839 (AArch64vashr v8i16:$src, (i32 15))))), 4840 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4841def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4842 (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), 4843 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4844def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4845 (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4846 (zext (extract_high_v16i8 (v16i8 V128:$opB)))), 4847 (AArch64vashr v8i16:$src, (i32 15))))), 4848 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4849def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), 4850 (zext (v4i16 V64:$opB))))), 4851 (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; 4852def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))), 4853 (zext (extract_high_v8i16 (v8i16 V128:$opB)))))), 4854 (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; 4855def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), 4856 (zext (v2i32 V64:$opB))))), 4857 (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; 4858def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))), 4859 (zext (extract_high_v4i32 (v4i32 V128:$opB)))))), 4860 (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; 4861 4862defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; 4863defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; 4864defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; 4865defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; 4866defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; 4867defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; 4868defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; 4869defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; 4870defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; 4871defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>; 4872 4873def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), 4874 (CMLTv8i8rz V64:$Rn)>; 4875def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))), 4876 (CMLTv4i16rz V64:$Rn)>; 4877def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))), 4878 (CMLTv2i32rz V64:$Rn)>; 4879def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))), 4880 (CMLTv16i8rz V128:$Rn)>; 4881def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))), 4882 (CMLTv8i16rz V128:$Rn)>; 4883def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))), 4884 (CMLTv4i32rz V128:$Rn)>; 4885def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))), 4886 (CMLTv2i64rz V128:$Rn)>; 4887 4888defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 4889defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 4890defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 4891defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 4892defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 4893defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; 4894defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; 4895defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; 4896def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), 4897 (FCVTLv4i16 V64:$Rn)>; 4898def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), 4899 (i64 4)))), 4900 (FCVTLv8i16 V128:$Rn)>; 4901def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), 4902 (FCVTLv2i32 V64:$Rn)>; 4903def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))), 4904 (FCVTLv4i32 V128:$Rn)>; 4905def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), 4906 (FCVTLv4i16 V64:$Rn)>; 4907def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))), 4908 (FCVTLv8i16 V128:$Rn)>; 4909 4910defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; 4911defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; 4912defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; 4913defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; 4914defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; 4915def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), 4916 (FCVTNv4i16 V128:$Rn)>; 4917def : Pat<(concat_vectors V64:$Rd, 4918 (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), 4919 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4920def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), 4921 (FCVTNv2i32 V128:$Rn)>; 4922def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), 4923 (FCVTNv4i16 V128:$Rn)>; 4924def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), 4925 (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4926def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))), 4927 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4928defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; 4929defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; 4930defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", 4931 int_aarch64_neon_fcvtxn>; 4932defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; 4933defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; 4934 4935// AArch64's FCVT instructions saturate when out of range. 4936multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> { 4937 let Predicates = [HasFullFP16] in { 4938 def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)), 4939 (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>; 4940 def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)), 4941 (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>; 4942 } 4943 def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)), 4944 (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>; 4945 def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)), 4946 (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>; 4947 def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)), 4948 (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>; 4949} 4950defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">; 4951defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">; 4952 4953def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; 4954def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; 4955def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; 4956def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>; 4957def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>; 4958 4959def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>; 4960def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>; 4961def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; 4962def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; 4963def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; 4964 4965defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>; 4966defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; 4967defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; 4968defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; 4969defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; 4970defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; 4971defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; 4972defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; 4973defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; 4974 4975let Predicates = [HasFRInt3264] in { 4976 defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; 4977 defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>; 4978 defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>; 4979 defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>; 4980} // HasFRInt3264 4981 4982defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; 4983defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; 4984defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", 4985 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 4986defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; 4987// Aliases for MVN -> NOT. 4988def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", 4989 (NOTv8i8 V64:$Vd, V64:$Vn)>; 4990def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", 4991 (NOTv16i8 V128:$Vd, V128:$Vn)>; 4992 4993def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4994def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4995def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4996def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4997def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4998def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4999 5000defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>; 5001defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; 5002defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; 5003defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; 5004defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", 5005 BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >; 5006defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>; 5007defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; 5008defm SHLL : SIMDVectorLShiftLongBySizeBHS; 5009defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 5010defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 5011defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; 5012defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; 5013defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; 5014defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", 5015 BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; 5016defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; 5017defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; 5018defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; 5019defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; 5020defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; 5021defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; 5022defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; 5023 5024def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 5025def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 5026def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 5027def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 5028def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 5029def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 5030def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 5031def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 5032def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; 5033def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; 5034 5035// Patterns for vector long shift (by element width). These need to match all 5036// three of zext, sext and anyext so it's easier to pull the patterns out of the 5037// definition. 5038multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { 5039 def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), 5040 (SHLLv8i8 V64:$Rn)>; 5041 def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)), 5042 (SHLLv16i8 V128:$Rn)>; 5043 def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), 5044 (SHLLv4i16 V64:$Rn)>; 5045 def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)), 5046 (SHLLv8i16 V128:$Rn)>; 5047 def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), 5048 (SHLLv2i32 V64:$Rn)>; 5049 def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)), 5050 (SHLLv4i32 V128:$Rn)>; 5051} 5052 5053defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; 5054defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; 5055defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; 5056 5057// Constant vector values, used in the S/UQXTN patterns below. 5058def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>; 5059def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>; 5060def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>; 5061def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>; 5062def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>; 5063def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>; 5064 5065// trunc(umin(X, 255)) -> UQXTRN v8i8 5066def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))), 5067 (UQXTNv8i8 V128:$Vn)>; 5068// trunc(umin(X, 65535)) -> UQXTRN v4i16 5069def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))), 5070 (UQXTNv4i16 V128:$Vn)>; 5071// trunc(smin(smax(X, -128), 128)) -> SQXTRN 5072// with reversed min/max 5073def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 5074 (v8i16 VImm7F)))), 5075 (SQXTNv8i8 V128:$Vn)>; 5076def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 5077 (v8i16 VImm80)))), 5078 (SQXTNv8i8 V128:$Vn)>; 5079// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN 5080// with reversed min/max 5081def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 5082 (v4i32 VImm7FFF)))), 5083 (SQXTNv4i16 V128:$Vn)>; 5084def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 5085 (v4i32 VImm8000)))), 5086 (SQXTNv4i16 V128:$Vn)>; 5087 5088// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) 5089// with reversed min/max 5090def : Pat<(v16i8 (concat_vectors 5091 (v8i8 V64:$Vd), 5092 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 5093 (v8i16 VImm7F)))))), 5094 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5095def : Pat<(v16i8 (concat_vectors 5096 (v8i8 V64:$Vd), 5097 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 5098 (v8i16 VImm80)))))), 5099 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5100 5101// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) 5102// with reversed min/max 5103def : Pat<(v8i16 (concat_vectors 5104 (v4i16 V64:$Vd), 5105 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 5106 (v4i32 VImm7FFF)))))), 5107 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5108def : Pat<(v8i16 (concat_vectors 5109 (v4i16 V64:$Vd), 5110 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 5111 (v4i32 VImm8000)))))), 5112 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 5113 5114//===----------------------------------------------------------------------===// 5115// Advanced SIMD three vector instructions. 5116//===----------------------------------------------------------------------===// 5117 5118defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; 5119defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>; 5120defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; 5121defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; 5122defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; 5123defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; 5124defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; 5125defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; 5126foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in { 5127def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>; 5128} 5129defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; 5130let Predicates = [HasNEON] in { 5131foreach VT = [ v2f32, v4f32, v2f64 ] in 5132def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 5133} 5134let Predicates = [HasNEON, HasFullFP16] in { 5135foreach VT = [ v4f16, v8f16 ] in 5136def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 5137} 5138defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>; 5139defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>; 5140defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>; 5141defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; 5142defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 5143defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 5144defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 5145defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; 5146defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; 5147defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; 5148defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; 5149defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; 5150defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; 5151defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; 5152defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; 5153defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; 5154 5155// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the 5156// instruction expects the addend first, while the fma intrinsic puts it last. 5157defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", 5158 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 5159defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", 5160 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 5161 5162defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; 5163defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; 5164defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; 5165defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; 5166defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; 5167 5168// MLA and MLS are generated in MachineCombine 5169defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; 5170defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; 5171 5172defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; 5173defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; 5174defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", 5175 TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; 5176defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; 5177defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>; 5178defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; 5179defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; 5180defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; 5181defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; 5182defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; 5183defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; 5184defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; 5185defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; 5186defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; 5187defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; 5188defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; 5189defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>; 5190defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; 5191defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; 5192defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; 5193defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", 5194 TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; 5195defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; 5196defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>; 5197defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; 5198defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; 5199defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; 5200defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; 5201defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; 5202defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; 5203defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; 5204defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; 5205defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; 5206defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>; 5207defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; 5208defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; 5209defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", 5210 int_aarch64_neon_sqrdmlah>; 5211defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", 5212 int_aarch64_neon_sqrdmlsh>; 5213 5214// Extra saturate patterns, other than the intrinsics matches above 5215defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>; 5216defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>; 5217defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>; 5218defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>; 5219 5220defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; 5221defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", 5222 BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; 5223defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; 5224defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", 5225 BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; 5226defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; 5227 5228// Pseudo bitwise select pattern BSP. 5229// It is expanded into BSL/BIT/BIF after register allocation. 5230defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS), 5231 (and (vnot node:$LHS), node:$RHS))>>; 5232defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">; 5233defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; 5234defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">; 5235 5236def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), 5237 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5238def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), 5239 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5240def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), 5241 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5242def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), 5243 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5244 5245def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), 5246 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5247def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), 5248 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5249def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), 5250 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5251def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), 5252 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5253 5254def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", 5255 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; 5256def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", 5257 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5258def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", 5259 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5260def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", 5261 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5262 5263def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", 5264 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; 5265def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", 5266 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5267def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", 5268 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5269def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", 5270 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5271 5272def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # 5273 "|cmls.8b\t$dst, $src1, $src2}", 5274 (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5275def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # 5276 "|cmls.16b\t$dst, $src1, $src2}", 5277 (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5278def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # 5279 "|cmls.4h\t$dst, $src1, $src2}", 5280 (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5281def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # 5282 "|cmls.8h\t$dst, $src1, $src2}", 5283 (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5284def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # 5285 "|cmls.2s\t$dst, $src1, $src2}", 5286 (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5287def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # 5288 "|cmls.4s\t$dst, $src1, $src2}", 5289 (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5290def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # 5291 "|cmls.2d\t$dst, $src1, $src2}", 5292 (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5293 5294def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # 5295 "|cmlo.8b\t$dst, $src1, $src2}", 5296 (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5297def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # 5298 "|cmlo.16b\t$dst, $src1, $src2}", 5299 (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5300def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # 5301 "|cmlo.4h\t$dst, $src1, $src2}", 5302 (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5303def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # 5304 "|cmlo.8h\t$dst, $src1, $src2}", 5305 (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5306def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # 5307 "|cmlo.2s\t$dst, $src1, $src2}", 5308 (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5309def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # 5310 "|cmlo.4s\t$dst, $src1, $src2}", 5311 (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5312def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # 5313 "|cmlo.2d\t$dst, $src1, $src2}", 5314 (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5315 5316def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # 5317 "|cmle.8b\t$dst, $src1, $src2}", 5318 (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5319def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # 5320 "|cmle.16b\t$dst, $src1, $src2}", 5321 (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5322def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # 5323 "|cmle.4h\t$dst, $src1, $src2}", 5324 (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5325def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # 5326 "|cmle.8h\t$dst, $src1, $src2}", 5327 (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5328def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # 5329 "|cmle.2s\t$dst, $src1, $src2}", 5330 (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5331def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # 5332 "|cmle.4s\t$dst, $src1, $src2}", 5333 (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5334def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # 5335 "|cmle.2d\t$dst, $src1, $src2}", 5336 (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5337 5338def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # 5339 "|cmlt.8b\t$dst, $src1, $src2}", 5340 (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5341def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # 5342 "|cmlt.16b\t$dst, $src1, $src2}", 5343 (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5344def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # 5345 "|cmlt.4h\t$dst, $src1, $src2}", 5346 (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5347def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # 5348 "|cmlt.8h\t$dst, $src1, $src2}", 5349 (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5350def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # 5351 "|cmlt.2s\t$dst, $src1, $src2}", 5352 (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5353def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # 5354 "|cmlt.4s\t$dst, $src1, $src2}", 5355 (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5356def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # 5357 "|cmlt.2d\t$dst, $src1, $src2}", 5358 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5359 5360let Predicates = [HasNEON, HasFullFP16] in { 5361def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # 5362 "|fcmle.4h\t$dst, $src1, $src2}", 5363 (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5364def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # 5365 "|fcmle.8h\t$dst, $src1, $src2}", 5366 (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5367} 5368def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # 5369 "|fcmle.2s\t$dst, $src1, $src2}", 5370 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5371def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # 5372 "|fcmle.4s\t$dst, $src1, $src2}", 5373 (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5374def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # 5375 "|fcmle.2d\t$dst, $src1, $src2}", 5376 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5377 5378let Predicates = [HasNEON, HasFullFP16] in { 5379def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # 5380 "|fcmlt.4h\t$dst, $src1, $src2}", 5381 (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5382def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # 5383 "|fcmlt.8h\t$dst, $src1, $src2}", 5384 (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5385} 5386def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # 5387 "|fcmlt.2s\t$dst, $src1, $src2}", 5388 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5389def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # 5390 "|fcmlt.4s\t$dst, $src1, $src2}", 5391 (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5392def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # 5393 "|fcmlt.2d\t$dst, $src1, $src2}", 5394 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5395 5396let Predicates = [HasNEON, HasFullFP16] in { 5397def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # 5398 "|facle.4h\t$dst, $src1, $src2}", 5399 (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5400def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # 5401 "|facle.8h\t$dst, $src1, $src2}", 5402 (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5403} 5404def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # 5405 "|facle.2s\t$dst, $src1, $src2}", 5406 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5407def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # 5408 "|facle.4s\t$dst, $src1, $src2}", 5409 (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5410def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # 5411 "|facle.2d\t$dst, $src1, $src2}", 5412 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5413 5414let Predicates = [HasNEON, HasFullFP16] in { 5415def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # 5416 "|faclt.4h\t$dst, $src1, $src2}", 5417 (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5418def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # 5419 "|faclt.8h\t$dst, $src1, $src2}", 5420 (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5421} 5422def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # 5423 "|faclt.2s\t$dst, $src1, $src2}", 5424 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5425def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # 5426 "|faclt.4s\t$dst, $src1, $src2}", 5427 (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5428def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # 5429 "|faclt.2d\t$dst, $src1, $src2}", 5430 (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5431 5432//===----------------------------------------------------------------------===// 5433// Advanced SIMD three scalar instructions. 5434//===----------------------------------------------------------------------===// 5435 5436defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; 5437defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; 5438defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; 5439defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; 5440defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; 5441defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; 5442defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; 5443defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; 5444def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 5445 (FABD64 FPR64:$Rn, FPR64:$Rm)>; 5446let Predicates = [HasNEON, HasFullFP16] in { 5447def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; 5448} 5449let Predicates = [HasNEON] in { 5450def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; 5451def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; 5452} 5453defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", 5454 int_aarch64_neon_facge>; 5455defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", 5456 int_aarch64_neon_facgt>; 5457defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 5458defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 5459defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 5460defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>; 5461defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>; 5462defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>; 5463defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; 5464defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; 5465defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 5466defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; 5467defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; 5468defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; 5469defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; 5470defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; 5471defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; 5472defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; 5473defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; 5474defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; 5475defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; 5476defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; 5477defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; 5478let Predicates = [HasRDM] in { 5479 defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; 5480 defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; 5481 def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5482 (i32 FPR32:$Rm))), 5483 (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5484 def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5485 (i32 FPR32:$Rm))), 5486 (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5487} 5488 5489defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64", 5490 int_aarch64_neon_fmulx, 5491 [HasNEONorSME]>; 5492 5493def : InstAlias<"cmls $dst, $src1, $src2", 5494 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5495def : InstAlias<"cmle $dst, $src1, $src2", 5496 (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5497def : InstAlias<"cmlo $dst, $src1, $src2", 5498 (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5499def : InstAlias<"cmlt $dst, $src1, $src2", 5500 (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5501def : InstAlias<"fcmle $dst, $src1, $src2", 5502 (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5503def : InstAlias<"fcmle $dst, $src1, $src2", 5504 (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5505def : InstAlias<"fcmlt $dst, $src1, $src2", 5506 (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5507def : InstAlias<"fcmlt $dst, $src1, $src2", 5508 (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5509def : InstAlias<"facle $dst, $src1, $src2", 5510 (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5511def : InstAlias<"facle $dst, $src1, $src2", 5512 (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5513def : InstAlias<"faclt $dst, $src1, $src2", 5514 (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5515def : InstAlias<"faclt $dst, $src1, $src2", 5516 (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5517 5518//===----------------------------------------------------------------------===// 5519// Advanced SIMD three scalar instructions (mixed operands). 5520//===----------------------------------------------------------------------===// 5521defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", 5522 int_aarch64_neon_sqdmulls_scalar>; 5523defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; 5524defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; 5525 5526def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), 5527 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5528 (i32 FPR32:$Rm))))), 5529 (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5530def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), 5531 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5532 (i32 FPR32:$Rm))))), 5533 (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5534 5535//===----------------------------------------------------------------------===// 5536// Advanced SIMD two scalar instructions. 5537//===----------------------------------------------------------------------===// 5538 5539defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs, [HasNoCSSC]>; 5540defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; 5541defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; 5542defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; 5543defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; 5544defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; 5545defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 5546defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 5547defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 5548defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 5549defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 5550defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; 5551defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; 5552defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; 5553defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; 5554defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; 5555defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; 5556defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; 5557defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; 5558def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; 5559defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; 5560defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; 5561defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>; 5562defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>; 5563defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>; 5564defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", 5565 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 5566defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; 5567defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 5568defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 5569defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; 5570defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; 5571defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", 5572 int_aarch64_neon_suqadd>; 5573defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; 5574defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; 5575defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", 5576 int_aarch64_neon_usqadd>; 5577 5578def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), 5579 (CMLTv1i64rz V64:$Rn)>; 5580 5581def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), 5582 (FCVTASv1i64 FPR64:$Rn)>; 5583def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), 5584 (FCVTAUv1i64 FPR64:$Rn)>; 5585def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), 5586 (FCVTMSv1i64 FPR64:$Rn)>; 5587def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), 5588 (FCVTMUv1i64 FPR64:$Rn)>; 5589def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), 5590 (FCVTNSv1i64 FPR64:$Rn)>; 5591def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), 5592 (FCVTNUv1i64 FPR64:$Rn)>; 5593def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), 5594 (FCVTPSv1i64 FPR64:$Rn)>; 5595def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), 5596 (FCVTPUv1i64 FPR64:$Rn)>; 5597def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), 5598 (FCVTZSv1i64 FPR64:$Rn)>; 5599def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), 5600 (FCVTZUv1i64 FPR64:$Rn)>; 5601 5602def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), 5603 (FRECPEv1f16 FPR16:$Rn)>; 5604def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), 5605 (FRECPEv1i32 FPR32:$Rn)>; 5606def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), 5607 (FRECPEv1i64 FPR64:$Rn)>; 5608def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), 5609 (FRECPEv1i64 FPR64:$Rn)>; 5610 5611def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), 5612 (FRECPEv1i32 FPR32:$Rn)>; 5613def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), 5614 (FRECPEv2f32 V64:$Rn)>; 5615def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), 5616 (FRECPEv4f32 FPR128:$Rn)>; 5617def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), 5618 (FRECPEv1i64 FPR64:$Rn)>; 5619def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), 5620 (FRECPEv1i64 FPR64:$Rn)>; 5621def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), 5622 (FRECPEv2f64 FPR128:$Rn)>; 5623 5624def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5625 (FRECPS32 FPR32:$Rn, FPR32:$Rm)>; 5626def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5627 (FRECPSv2f32 V64:$Rn, V64:$Rm)>; 5628def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5629 (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5630def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5631 (FRECPS64 FPR64:$Rn, FPR64:$Rm)>; 5632def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5633 (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5634 5635def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))), 5636 (FRECPXv1f16 FPR16:$Rn)>; 5637def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), 5638 (FRECPXv1i32 FPR32:$Rn)>; 5639def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), 5640 (FRECPXv1i64 FPR64:$Rn)>; 5641 5642def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))), 5643 (FRSQRTEv1f16 FPR16:$Rn)>; 5644def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), 5645 (FRSQRTEv1i32 FPR32:$Rn)>; 5646def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), 5647 (FRSQRTEv1i64 FPR64:$Rn)>; 5648def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), 5649 (FRSQRTEv1i64 FPR64:$Rn)>; 5650 5651def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), 5652 (FRSQRTEv1i32 FPR32:$Rn)>; 5653def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), 5654 (FRSQRTEv2f32 V64:$Rn)>; 5655def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), 5656 (FRSQRTEv4f32 FPR128:$Rn)>; 5657def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), 5658 (FRSQRTEv1i64 FPR64:$Rn)>; 5659def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), 5660 (FRSQRTEv1i64 FPR64:$Rn)>; 5661def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), 5662 (FRSQRTEv2f64 FPR128:$Rn)>; 5663 5664def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5665 (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>; 5666def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5667 (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>; 5668def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5669 (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5670def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5671 (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>; 5672def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5673 (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5674 5675// Some float -> int -> float conversion patterns for which we want to keep the 5676// int values in FP registers using the corresponding NEON instructions to 5677// avoid more costly int <-> fp register transfers. 5678let Predicates = [HasNEON] in { 5679def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), 5680 (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; 5681def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), 5682 (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; 5683def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), 5684 (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; 5685def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), 5686 (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; 5687 5688let Predicates = [HasFullFP16] in { 5689def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), 5690 (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; 5691def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), 5692 (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; 5693} 5694// If an integer is about to be converted to a floating point value, 5695// just load it on the floating point unit. 5696// Here are the patterns for 8 and 16-bits to float. 5697// 8-bits -> float. 5698multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy, 5699 SDPatternOperator loadop, Instruction UCVTF, 5700 ROAddrMode ro, Instruction LDRW, Instruction LDRX, 5701 SubRegIndex sub> { 5702 def : Pat<(DstTy (uint_to_fp (SrcTy 5703 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, 5704 ro.Wext:$extend))))), 5705 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5706 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 5707 sub))>; 5708 5709 def : Pat<(DstTy (uint_to_fp (SrcTy 5710 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, 5711 ro.Wext:$extend))))), 5712 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5713 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 5714 sub))>; 5715} 5716 5717defm : UIntToFPROLoadPat<f32, i32, zextloadi8, 5718 UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>; 5719def : Pat <(f32 (uint_to_fp (i32 5720 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5721 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5722 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5723def : Pat <(f32 (uint_to_fp (i32 5724 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5725 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5726 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5727// 16-bits -> float. 5728defm : UIntToFPROLoadPat<f32, i32, zextloadi16, 5729 UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>; 5730def : Pat <(f32 (uint_to_fp (i32 5731 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5732 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5733 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5734def : Pat <(f32 (uint_to_fp (i32 5735 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5736 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5737 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5738// 32-bits are handled in target specific dag combine: 5739// performIntToFpCombine. 5740// 64-bits integer to 32-bits floating point, not possible with 5741// UCVTF on floating point registers (both source and destination 5742// must have the same size). 5743 5744// Here are the patterns for 8, 16, 32, and 64-bits to double. 5745// 8-bits -> double. 5746defm : UIntToFPROLoadPat<f64, i32, zextloadi8, 5747 UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>; 5748def : Pat <(f64 (uint_to_fp (i32 5749 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5750 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5751 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5752def : Pat <(f64 (uint_to_fp (i32 5753 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5754 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5755 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5756// 16-bits -> double. 5757defm : UIntToFPROLoadPat<f64, i32, zextloadi16, 5758 UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>; 5759def : Pat <(f64 (uint_to_fp (i32 5760 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5761 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5762 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5763def : Pat <(f64 (uint_to_fp (i32 5764 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5765 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5766 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5767// 32-bits -> double. 5768defm : UIntToFPROLoadPat<f64, i32, load, 5769 UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>; 5770def : Pat <(f64 (uint_to_fp (i32 5771 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 5772 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5773 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; 5774def : Pat <(f64 (uint_to_fp (i32 5775 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), 5776 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5777 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; 5778// 64-bits -> double are handled in target specific dag combine: 5779// performIntToFpCombine. 5780} // let Predicates = [HasNEON] 5781 5782//===----------------------------------------------------------------------===// 5783// Advanced SIMD three different-sized vector instructions. 5784//===----------------------------------------------------------------------===// 5785 5786defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; 5787defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; 5788defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; 5789defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; 5790defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>; 5791defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", 5792 AArch64sabd>; 5793defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", 5794 AArch64sabd>; 5795defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", 5796 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; 5797defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", 5798 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; 5799defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", 5800 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5801defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", 5802 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5803defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>; 5804defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", 5805 int_aarch64_neon_sqadd>; 5806defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", 5807 int_aarch64_neon_sqsub>; 5808defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", 5809 int_aarch64_neon_sqdmull>; 5810defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", 5811 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; 5812defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", 5813 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; 5814defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", 5815 AArch64uabd>; 5816defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", 5817 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; 5818defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", 5819 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; 5820defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", 5821 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5822defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", 5823 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5824defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>; 5825defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", 5826 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; 5827defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", 5828 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; 5829 5830// Additional patterns for [SU]ML[AS]L 5831multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode, 5832 Instruction INST8B, Instruction INST4H, Instruction INST2S> { 5833 def : Pat<(v4i16 (opnode 5834 V64:$Ra, 5835 (v4i16 (extract_subvector 5836 (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)), 5837 (i64 0))))), 5838 (EXTRACT_SUBREG (v8i16 (INST8B 5839 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub), 5840 V64:$Rn, V64:$Rm)), dsub)>; 5841 def : Pat<(v2i32 (opnode 5842 V64:$Ra, 5843 (v2i32 (extract_subvector 5844 (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)), 5845 (i64 0))))), 5846 (EXTRACT_SUBREG (v4i32 (INST4H 5847 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub), 5848 V64:$Rn, V64:$Rm)), dsub)>; 5849 def : Pat<(v1i64 (opnode 5850 V64:$Ra, 5851 (v1i64 (extract_subvector 5852 (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)), 5853 (i64 0))))), 5854 (EXTRACT_SUBREG (v2i64 (INST2S 5855 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub), 5856 V64:$Rn, V64:$Rm)), dsub)>; 5857} 5858 5859defm : Neon_mul_acc_widen_patterns<add, AArch64umull, 5860 UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; 5861defm : Neon_mul_acc_widen_patterns<add, AArch64smull, 5862 SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; 5863defm : Neon_mul_acc_widen_patterns<sub, AArch64umull, 5864 UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; 5865defm : Neon_mul_acc_widen_patterns<sub, AArch64smull, 5866 SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; 5867 5868 5869multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> { 5870 def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)), 5871 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), 5872 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>; 5873 def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)), 5874 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), 5875 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>; 5876 def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)), 5877 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), 5878 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>; 5879 5880 def : Pat<(v4i16 (opnode (v4i16 V64:$Rn), 5881 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), 5882 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5883 def : Pat<(v2i32 (opnode (v2i32 V64:$Rn), 5884 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), 5885 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5886 def : Pat<(v1i64 (opnode (v1i64 V64:$Rn), 5887 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), 5888 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5889} 5890 5891defm : Neon_addl_extract_patterns<add, zanyext, "UADD">; 5892defm : Neon_addl_extract_patterns<add, sext, "SADD">; 5893defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">; 5894defm : Neon_addl_extract_patterns<sub, sext, "SSUB">; 5895 5896// CodeGen patterns for addhn and subhn instructions, which can actually be 5897// written in LLVM IR without too much difficulty. 5898 5899// Prioritize ADDHN and SUBHN over UZP2. 5900let AddedComplexity = 10 in { 5901 5902// ADDHN 5903def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), 5904 (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5905def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5906 (i32 16))))), 5907 (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5908def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5909 (i32 32))))), 5910 (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5911def : Pat<(concat_vectors (v8i8 V64:$Rd), 5912 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5913 (i32 8))))), 5914 (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5915 V128:$Rn, V128:$Rm)>; 5916def : Pat<(concat_vectors (v4i16 V64:$Rd), 5917 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5918 (i32 16))))), 5919 (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5920 V128:$Rn, V128:$Rm)>; 5921def : Pat<(concat_vectors (v2i32 V64:$Rd), 5922 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5923 (i32 32))))), 5924 (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5925 V128:$Rn, V128:$Rm)>; 5926 5927// SUBHN 5928def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), 5929 (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5930def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5931 (i32 16))))), 5932 (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5933def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5934 (i32 32))))), 5935 (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5936def : Pat<(concat_vectors (v8i8 V64:$Rd), 5937 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5938 (i32 8))))), 5939 (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5940 V128:$Rn, V128:$Rm)>; 5941def : Pat<(concat_vectors (v4i16 V64:$Rd), 5942 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5943 (i32 16))))), 5944 (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5945 V128:$Rn, V128:$Rm)>; 5946def : Pat<(concat_vectors (v2i32 V64:$Rd), 5947 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5948 (i32 32))))), 5949 (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5950 V128:$Rn, V128:$Rm)>; 5951 5952} // AddedComplexity = 10 5953 5954//---------------------------------------------------------------------------- 5955// AdvSIMD bitwise extract from vector instruction. 5956//---------------------------------------------------------------------------- 5957 5958defm EXT : SIMDBitwiseExtract<"ext">; 5959 5960def AdjustExtImm : SDNodeXForm<imm, [{ 5961 return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32); 5962}]>; 5963multiclass ExtPat<ValueType VT64, ValueType VT128, int N> { 5964 def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 5965 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 5966 def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 5967 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 5968 // We use EXT to handle extract_subvector to copy the upper 64-bits of a 5969 // 128-bit vector. 5970 def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))), 5971 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 5972 // A 64-bit EXT of two halves of the same 128-bit register can be done as a 5973 // single 128-bit EXT. 5974 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)), 5975 (extract_subvector V128:$Rn, (i64 N)), 5976 (i32 imm:$imm))), 5977 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>; 5978 // A 64-bit EXT of the high half of a 128-bit register can be done using a 5979 // 128-bit EXT of the whole register with an adjustment to the immediate. The 5980 // top half of the other operand will be unset, but that doesn't matter as it 5981 // will not be used. 5982 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)), 5983 V64:$Rm, 5984 (i32 imm:$imm))), 5985 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, 5986 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 5987 (AdjustExtImm imm:$imm)), dsub)>; 5988} 5989 5990defm : ExtPat<v8i8, v16i8, 8>; 5991defm : ExtPat<v4i16, v8i16, 4>; 5992defm : ExtPat<v4f16, v8f16, 4>; 5993defm : ExtPat<v4bf16, v8bf16, 4>; 5994defm : ExtPat<v2i32, v4i32, 2>; 5995defm : ExtPat<v2f32, v4f32, 2>; 5996defm : ExtPat<v1i64, v2i64, 1>; 5997defm : ExtPat<v1f64, v2f64, 1>; 5998 5999//---------------------------------------------------------------------------- 6000// AdvSIMD zip vector 6001//---------------------------------------------------------------------------- 6002 6003defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; 6004defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; 6005defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; 6006defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; 6007defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; 6008defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; 6009 6010def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))), 6011 (v8i8 (trunc (v8i16 V128:$Vm))))), 6012 (UZP1v16i8 V128:$Vn, V128:$Vm)>; 6013def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))), 6014 (v4i16 (trunc (v4i32 V128:$Vm))))), 6015 (UZP1v8i16 V128:$Vn, V128:$Vm)>; 6016def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), 6017 (v2i32 (trunc (v2i64 V128:$Vm))))), 6018 (UZP1v4i32 V128:$Vn, V128:$Vm)>; 6019// These are the same as above, with an optional assertzext node that can be 6020// generated from fptoi lowering. 6021def : Pat<(v16i8 (concat_vectors (v8i8 (assertzext (trunc (v8i16 V128:$Vn)))), 6022 (v8i8 (assertzext (trunc (v8i16 V128:$Vm)))))), 6023 (UZP1v16i8 V128:$Vn, V128:$Vm)>; 6024def : Pat<(v8i16 (concat_vectors (v4i16 (assertzext (trunc (v4i32 V128:$Vn)))), 6025 (v4i16 (assertzext (trunc (v4i32 V128:$Vm)))))), 6026 (UZP1v8i16 V128:$Vn, V128:$Vm)>; 6027def : Pat<(v4i32 (concat_vectors (v2i32 (assertzext (trunc (v2i64 V128:$Vn)))), 6028 (v2i32 (assertzext (trunc (v2i64 V128:$Vm)))))), 6029 (UZP1v4i32 V128:$Vn, V128:$Vm)>; 6030 6031def : Pat<(v16i8 (concat_vectors 6032 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), 6033 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), 6034 (UZP2v16i8 V128:$Vn, V128:$Vm)>; 6035def : Pat<(v8i16 (concat_vectors 6036 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), 6037 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), 6038 (UZP2v8i16 V128:$Vn, V128:$Vm)>; 6039def : Pat<(v4i32 (concat_vectors 6040 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), 6041 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), 6042 (UZP2v4i32 V128:$Vn, V128:$Vm)>; 6043 6044//---------------------------------------------------------------------------- 6045// AdvSIMD TBL/TBX instructions 6046//---------------------------------------------------------------------------- 6047 6048defm TBL : SIMDTableLookup< 0, "tbl">; 6049defm TBX : SIMDTableLookupTied<1, "tbx">; 6050 6051def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 6052 (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; 6053def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 6054 (TBLv16i8One V128:$Ri, V128:$Rn)>; 6055 6056def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), 6057 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 6058 (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; 6059def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), 6060 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 6061 (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; 6062 6063//---------------------------------------------------------------------------- 6064// AdvSIMD LUT instructions 6065//---------------------------------------------------------------------------- 6066let Predicates = [HasLUT] in { 6067 defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">; 6068 defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">; 6069} 6070 6071//---------------------------------------------------------------------------- 6072// AdvSIMD scalar DUP instruction 6073//---------------------------------------------------------------------------- 6074 6075defm DUP : SIMDScalarDUP<"mov">; 6076 6077//---------------------------------------------------------------------------- 6078// AdvSIMD scalar pairwise instructions 6079//---------------------------------------------------------------------------- 6080 6081defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; 6082defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; 6083defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; 6084defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; 6085defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; 6086defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; 6087 6088// Only the lower half of the result of the inner FADDP is used in the patterns 6089// below, so the second operand does not matter. Re-use the first input 6090// operand, so no additional dependencies need to be introduced. 6091let Predicates = [HasFullFP16] in { 6092def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))), 6093 (FADDPv2i16p 6094 (EXTRACT_SUBREG 6095 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn), 6096 dsub))>; 6097def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))), 6098 (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>; 6099} 6100def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))), 6101 (FADDPv2i32p 6102 (EXTRACT_SUBREG 6103 (FADDPv4f32 V128:$Rn, V128:$Rn), 6104 dsub))>; 6105def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))), 6106 (FADDPv2i32p V64:$Rn)>; 6107def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))), 6108 (FADDPv2i64p V128:$Rn)>; 6109 6110def : Pat<(v2i64 (AArch64saddv V128:$Rn)), 6111 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 6112def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), 6113 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 6114def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), 6115 (FADDPv2i32p V64:$Rn)>; 6116def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), 6117 (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; 6118def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), 6119 (FADDPv2i64p V128:$Rn)>; 6120def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))), 6121 (FMAXNMPv2i32p V64:$Rn)>; 6122def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))), 6123 (FMAXNMPv2i64p V128:$Rn)>; 6124def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))), 6125 (FMAXPv2i32p V64:$Rn)>; 6126def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))), 6127 (FMAXPv2i64p V128:$Rn)>; 6128def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))), 6129 (FMINNMPv2i32p V64:$Rn)>; 6130def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))), 6131 (FMINNMPv2i64p V128:$Rn)>; 6132def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))), 6133 (FMINPv2i32p V64:$Rn)>; 6134def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))), 6135 (FMINPv2i64p V128:$Rn)>; 6136 6137//---------------------------------------------------------------------------- 6138// AdvSIMD INS/DUP instructions 6139//---------------------------------------------------------------------------- 6140 6141def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; 6142def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; 6143def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; 6144def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; 6145def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; 6146def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; 6147def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; 6148 6149def DUPv2i64lane : SIMDDup64FromElement; 6150def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; 6151def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; 6152def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; 6153def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; 6154def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; 6155def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; 6156 6157// DUP from a 64-bit register to a 64-bit register is just a copy 6158def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))), 6159 (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>; 6160def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))), 6161 (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>; 6162 6163def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), 6164 (v2f32 (DUPv2i32lane 6165 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 6166 (i64 0)))>; 6167def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), 6168 (v4f32 (DUPv4i32lane 6169 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 6170 (i64 0)))>; 6171def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), 6172 (v2f64 (DUPv2i64lane 6173 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), 6174 (i64 0)))>; 6175def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), 6176 (v4f16 (DUPv4i16lane 6177 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6178 (i64 0)))>; 6179def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))), 6180 (v4bf16 (DUPv4i16lane 6181 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6182 (i64 0)))>; 6183def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), 6184 (v8f16 (DUPv8i16lane 6185 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6186 (i64 0)))>; 6187def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))), 6188 (v8bf16 (DUPv8i16lane 6189 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 6190 (i64 0)))>; 6191 6192def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 6193 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 6194def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 6195 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 6196 6197def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 6198 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 6199def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 6200 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 6201 6202def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 6203 (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; 6204def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 6205 (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; 6206def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), 6207 (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; 6208 6209// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane 6210// instruction even if the types don't match: we just have to remap the lane 6211// carefully. N.b. this trick only applies to truncations. 6212def VecIndex_x2 : SDNodeXForm<imm, [{ 6213 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); 6214}]>; 6215def VecIndex_x4 : SDNodeXForm<imm, [{ 6216 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); 6217}]>; 6218def VecIndex_x8 : SDNodeXForm<imm, [{ 6219 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); 6220}]>; 6221 6222multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, 6223 ValueType Src128VT, ValueType ScalVT, 6224 Instruction DUP, SDNodeXForm IdxXFORM> { 6225 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), 6226 imm:$idx)))), 6227 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 6228 6229 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), 6230 imm:$idx)))), 6231 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 6232} 6233 6234defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; 6235defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; 6236defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; 6237 6238defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; 6239defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; 6240defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; 6241 6242multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, 6243 SDNodeXForm IdxXFORM> { 6244 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), 6245 imm:$idx))))), 6246 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 6247 6248 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), 6249 imm:$idx))))), 6250 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 6251} 6252 6253defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; 6254defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; 6255defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; 6256 6257defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; 6258defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; 6259defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; 6260 6261// SMOV and UMOV definitions, with some extra patterns for convenience 6262defm SMOV : SMov; 6263defm UMOV : UMov; 6264 6265def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 6266 (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; 6267def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 6268 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 6269def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6270 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 6271def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6272 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 6273def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6274 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 6275def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), 6276 (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; 6277 6278def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 6279 VectorIndexB:$idx)))), i8), 6280 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 6281def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 6282 VectorIndexH:$idx)))), i16), 6283 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 6284 6285// Extracting i8 or i16 elements will have the zero-extend transformed to 6286// an 'and' mask by type legalization since neither i8 nor i16 are legal types 6287// for AArch64. Match these patterns here since UMOV already zeroes out the high 6288// bits of the destination register. 6289def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), 6290 (i32 0xff)), 6291 (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; 6292def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), 6293 (i32 0xffff)), 6294 (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; 6295 6296def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 6297 VectorIndexB:$idx)))), (i64 0xff))), 6298 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>; 6299def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 6300 VectorIndexH:$idx)))), (i64 0xffff))), 6301 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>; 6302 6303defm INS : SIMDIns; 6304 6305def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), 6306 (SUBREG_TO_REG (i32 0), 6307 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6308def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), 6309 (SUBREG_TO_REG (i32 0), 6310 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6311 6312// The top bits will be zero from the FMOVWSr 6313def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))), 6314 (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>; 6315 6316def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), 6317 (SUBREG_TO_REG (i32 0), 6318 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6319def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), 6320 (SUBREG_TO_REG (i32 0), 6321 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6322 6323def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 6324 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6325def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 6326 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6327 6328def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6329 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6330def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6331 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6332 6333def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), 6334 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 6335 (i32 FPR32:$Rn), ssub))>; 6336def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), 6337 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6338 (i32 FPR32:$Rn), ssub))>; 6339 6340def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), 6341 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 6342 (i64 FPR64:$Rn), dsub))>; 6343 6344def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 6345 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6346def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 6347 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6348 6349def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6350 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6351def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6352 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6353 6354def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), 6355 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 6356def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), 6357 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 6358 6359def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), 6360 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; 6361 6362def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), 6363 (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 6364 (EXTRACT_SUBREG 6365 (INSvi16lane 6366 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6367 VectorIndexS:$imm, 6368 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6369 (i64 0)), 6370 dsub)>; 6371 6372def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), 6373 (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>; 6374def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), 6375 (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>; 6376def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), 6377 (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>; 6378def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), 6379 (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>; 6380def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)), 6381 (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>; 6382 6383def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), 6384 (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 6385 (INSvi16lane 6386 V128:$Rn, VectorIndexH:$imm, 6387 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6388 (i64 0))>; 6389 6390def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn), 6391 (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 6392 (EXTRACT_SUBREG 6393 (INSvi16lane 6394 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6395 VectorIndexS:$imm, 6396 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6397 (i64 0)), 6398 dsub)>; 6399 6400def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn), 6401 (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 6402 (INSvi16lane 6403 V128:$Rn, VectorIndexH:$imm, 6404 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6405 (i64 0))>; 6406 6407def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), 6408 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 6409 (EXTRACT_SUBREG 6410 (INSvi32lane 6411 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6412 VectorIndexS:$imm, 6413 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 6414 (i64 0)), 6415 dsub)>; 6416def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), 6417 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 6418 (INSvi32lane 6419 V128:$Rn, VectorIndexS:$imm, 6420 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 6421 (i64 0))>; 6422def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), 6423 (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), 6424 (INSvi64lane 6425 V128:$Rn, VectorIndexD:$imm, 6426 (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), 6427 (i64 0))>; 6428 6429def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))), 6430 (EXTRACT_SUBREG 6431 (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6432 VectorIndexS:$imm, GPR32:$Rm), 6433 dsub)>; 6434def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))), 6435 (EXTRACT_SUBREG 6436 (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6437 VectorIndexH:$imm, GPR32:$Rm), 6438 dsub)>; 6439def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))), 6440 (EXTRACT_SUBREG 6441 (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6442 VectorIndexB:$imm, GPR32:$Rm), 6443 dsub)>; 6444 6445// Copy an element at a constant index in one vector into a constant indexed 6446// element of another. 6447// FIXME refactor to a shared class/dev parameterized on vector type, vector 6448// index type and INS extension 6449def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane 6450 (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), 6451 VectorIndexB:$idx2)), 6452 (v16i8 (INSvi8lane 6453 V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) 6454 )>; 6455def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane 6456 (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), 6457 VectorIndexH:$idx2)), 6458 (v8i16 (INSvi16lane 6459 V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) 6460 )>; 6461def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane 6462 (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), 6463 VectorIndexS:$idx2)), 6464 (v4i32 (INSvi32lane 6465 V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) 6466 )>; 6467def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane 6468 (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), 6469 VectorIndexD:$idx2)), 6470 (v2i64 (INSvi64lane 6471 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) 6472 )>; 6473 6474multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, 6475 ValueType VTScal, Instruction INS> { 6476 def : Pat<(VT128 (vector_insert V128:$src, 6477 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6478 imm:$Immd)), 6479 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; 6480 6481 def : Pat<(VT128 (vector_insert V128:$src, 6482 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6483 imm:$Immd)), 6484 (INS V128:$src, imm:$Immd, 6485 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; 6486 6487 def : Pat<(VT64 (vector_insert V64:$src, 6488 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6489 imm:$Immd)), 6490 (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), 6491 imm:$Immd, V128:$Rn, imm:$Immn), 6492 dsub)>; 6493 6494 def : Pat<(VT64 (vector_insert V64:$src, 6495 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6496 imm:$Immd)), 6497 (EXTRACT_SUBREG 6498 (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, 6499 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), 6500 dsub)>; 6501} 6502 6503defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; 6504defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>; 6505defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; 6506defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; 6507 6508defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>; 6509defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>; 6510defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>; 6511defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>; 6512 6513// Insert from bitcast 6514// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0) 6515def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6516 (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>; 6517def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6518 (EXTRACT_SUBREG 6519 (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)), 6520 imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0), 6521 dsub)>; 6522def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)), 6523 (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>; 6524 6525// bitcast of an extract 6526// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) 6527def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), 6528 (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>; 6529def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))), 6530 (EXTRACT_SUBREG V128:$src, ssub)>; 6531def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), 6532 (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>; 6533def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))), 6534 (EXTRACT_SUBREG V128:$src, dsub)>; 6535 6536// Floating point vector extractions are codegen'd as either a sequence of 6537// subregister extractions, or a MOV (aka DUP here) if 6538// the lane number is anything other than zero. 6539def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))), 6540 (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; 6541def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))), 6542 (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; 6543def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))), 6544 (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6545def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))), 6546 (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6547 6548 6549def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), 6550 (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>; 6551def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), 6552 (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>; 6553def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), 6554 (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6555def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), 6556 (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6557 6558// All concat_vectors operations are canonicalised to act on i64 vectors for 6559// AArch64. In the general case we need an instruction, which had just as well be 6560// INS. 6561class ConcatPat<ValueType DstTy, ValueType SrcTy> 6562 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), 6563 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, 6564 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; 6565 6566def : ConcatPat<v2i64, v1i64>; 6567def : ConcatPat<v2f64, v1f64>; 6568def : ConcatPat<v4i32, v2i32>; 6569def : ConcatPat<v4f32, v2f32>; 6570def : ConcatPat<v8i16, v4i16>; 6571def : ConcatPat<v8f16, v4f16>; 6572def : ConcatPat<v8bf16, v4bf16>; 6573def : ConcatPat<v16i8, v8i8>; 6574 6575// If the high lanes are undef, though, we can just ignore them: 6576class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> 6577 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), 6578 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; 6579 6580def : ConcatUndefPat<v2i64, v1i64>; 6581def : ConcatUndefPat<v2f64, v1f64>; 6582def : ConcatUndefPat<v4i32, v2i32>; 6583def : ConcatUndefPat<v4f32, v2f32>; 6584def : ConcatUndefPat<v8i16, v4i16>; 6585def : ConcatUndefPat<v16i8, v8i8>; 6586 6587//---------------------------------------------------------------------------- 6588// AdvSIMD across lanes instructions 6589//---------------------------------------------------------------------------- 6590 6591defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; 6592defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; 6593defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; 6594defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; 6595defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; 6596defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; 6597defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; 6598defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>; 6599defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>; 6600defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>; 6601defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>; 6602 6603multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> { 6604 // Patterns for addv(addlp(x)) ==> addlv 6605 def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, 6606 (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))), 6607 (i64 0))), (i64 0))), 6608 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6609 (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>; 6610 def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))), 6611 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6612 (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>; 6613 def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))), 6614 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>; 6615 6616 // Patterns for addp(addlp(x))) ==> addlv 6617 def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))), 6618 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>; 6619 def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))), 6620 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>; 6621} 6622 6623defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>; 6624defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>; 6625 6626// Patterns for uaddlv(uaddlp(x)) ==> uaddlv 6627def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), 6628 (i64 (EXTRACT_SUBREG 6629 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)), 6630 dsub))>; 6631 6632def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))), 6633 (i32 (EXTRACT_SUBREG 6634 (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)), 6635 ssub))>; 6636 6637def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))), 6638 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>; 6639 6640def : Pat<(v4i32 (AArch64uaddlv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))), 6641 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub))>; 6642 6643def : Pat<(v4i32 (AArch64uaddlv (v8i8 V64:$Rn))), 6644 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$Rn), hsub))>; 6645 6646def : Pat<(v4i32 (AArch64uaddlv (v4i16 V64:$Rn))), 6647 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv4i16v V64:$Rn), ssub))>; 6648 6649def : Pat<(v4i32 (AArch64uaddlv (v16i8 V128:$Rn))), 6650 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$Rn), hsub))>; 6651 6652def : Pat<(v4i32 (AArch64uaddlv (v8i16 V128:$Rn))), 6653 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$Rn), ssub))>; 6654 6655// Patterns for across-vector intrinsics, that have a node equivalent, that 6656// returns a vector (with only the low lane defined) instead of a scalar. 6657// In effect, opNode is the same as (scalar_to_vector (IntNode)). 6658multiclass SIMDAcrossLanesIntrinsic<string baseOpc, 6659 SDPatternOperator opNode> { 6660// If a lane instruction caught the vector_extract around opNode, we can 6661// directly match the latter to the instruction. 6662def : Pat<(v8i8 (opNode V64:$Rn)), 6663 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6664 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; 6665def : Pat<(v16i8 (opNode V128:$Rn)), 6666 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6667 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; 6668def : Pat<(v4i16 (opNode V64:$Rn)), 6669 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6670 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; 6671def : Pat<(v8i16 (opNode V128:$Rn)), 6672 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6673 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; 6674def : Pat<(v4i32 (opNode V128:$Rn)), 6675 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6676 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; 6677 6678 6679// If none did, fallback to the explicit patterns, consuming the vector_extract. 6680def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), 6681 (i64 0)), (i64 0))), 6682 (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6683 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), 6684 bsub), ssub)>; 6685def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), 6686 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6687 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), 6688 bsub), ssub)>; 6689def : Pat<(i32 (vector_extract (insert_subvector undef, 6690 (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))), 6691 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6692 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), 6693 hsub), ssub)>; 6694def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), 6695 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6696 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), 6697 hsub), ssub)>; 6698def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), 6699 (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6700 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), 6701 ssub), ssub)>; 6702 6703} 6704 6705multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, 6706 SDPatternOperator opNode> 6707 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6708// If there is a sign extension after this intrinsic, consume it as smov already 6709// performed it 6710def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6711 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)), 6712 (i32 (SMOVvi8to32 6713 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6714 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6715 (i64 0)))>; 6716def : Pat<(i32 (sext_inreg (i32 (vector_extract 6717 (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), 6718 (i32 (SMOVvi8to32 6719 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6720 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6721 (i64 0)))>; 6722def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6723 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)), 6724 (i32 (SMOVvi16to32 6725 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6726 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6727 (i64 0)))>; 6728def : Pat<(i32 (sext_inreg (i32 (vector_extract 6729 (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), 6730 (i32 (SMOVvi16to32 6731 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6732 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6733 (i64 0)))>; 6734} 6735 6736multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, 6737 SDPatternOperator opNode> 6738 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6739// If there is a masking operation keeping only what has been actually 6740// generated, consume it. 6741def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6742 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)), 6743 (i32 (EXTRACT_SUBREG 6744 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6745 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6746 ssub))>; 6747def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), 6748 maski8_or_more)), 6749 (i32 (EXTRACT_SUBREG 6750 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6751 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6752 ssub))>; 6753def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6754 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)), 6755 (i32 (EXTRACT_SUBREG 6756 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6757 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6758 ssub))>; 6759def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), 6760 maski16_or_more)), 6761 (i32 (EXTRACT_SUBREG 6762 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6763 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6764 ssub))>; 6765} 6766 6767// For vecreduce_add, used by GlobalISel not SDAG 6768def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))), 6769 (i8 (ADDVv8i8v V64:$Rn))>; 6770def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))), 6771 (i8 (ADDVv16i8v V128:$Rn))>; 6772def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))), 6773 (i16 (ADDVv4i16v V64:$Rn))>; 6774def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))), 6775 (i16 (ADDVv8i16v V128:$Rn))>; 6776def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))), 6777 (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6778def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))), 6779 (i32 (ADDVv4i32v V128:$Rn))>; 6780def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))), 6781 (i64 (ADDPv2i64p V128:$Rn))>; 6782 6783defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; 6784// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6785def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), 6786 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6787 6788defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; 6789// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6790def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), 6791 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6792 6793defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; 6794def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), 6795 (SMAXPv2i32 V64:$Rn, V64:$Rn)>; 6796 6797defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; 6798def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), 6799 (SMINPv2i32 V64:$Rn, V64:$Rn)>; 6800 6801defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; 6802def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), 6803 (UMAXPv2i32 V64:$Rn, V64:$Rn)>; 6804 6805defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; 6806def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), 6807 (UMINPv2i32 V64:$Rn, V64:$Rn)>; 6808 6809// For vecreduce_{opc} used by GlobalISel, not SDAG at the moment 6810// because GlobalISel allows us to specify the return register to be a FPR 6811multiclass SIMDAcrossLanesVecReductionIntrinsic<string baseOpc, 6812 SDPatternOperator opNode> { 6813def : Pat<(i8 (opNode (v8i8 FPR64:$Rn))), 6814 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) FPR64:$Rn)>; 6815 6816def : Pat<(i8 (opNode (v16i8 FPR128:$Rn))), 6817 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) FPR128:$Rn)>; 6818 6819def : Pat<(i16 (opNode (v4i16 FPR64:$Rn))), 6820 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) FPR64:$Rn)>; 6821 6822def : Pat<(i16 (opNode (v8i16 FPR128:$Rn))), 6823 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) FPR128:$Rn)>; 6824 6825def : Pat<(i32 (opNode (v4i32 V128:$Rn))), 6826 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn)>; 6827} 6828 6829// For v2i32 source type, the pairwise instruction can be used instead 6830defm : SIMDAcrossLanesVecReductionIntrinsic<"UMINV", vecreduce_umin>; 6831def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))), 6832 (i32 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6833 6834defm : SIMDAcrossLanesVecReductionIntrinsic<"UMAXV", vecreduce_umax>; 6835def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))), 6836 (i32 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6837 6838defm : SIMDAcrossLanesVecReductionIntrinsic<"SMINV", vecreduce_smin>; 6839def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))), 6840 (i32 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6841 6842defm : SIMDAcrossLanesVecReductionIntrinsic<"SMAXV", vecreduce_smax>; 6843def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))), 6844 (i32 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>; 6845 6846multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { 6847 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6848 (i32 (SMOVvi16to32 6849 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6850 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6851 (i64 0)))>; 6852def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6853 (i32 (SMOVvi16to32 6854 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6855 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6856 (i64 0)))>; 6857 6858def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6859 (i32 (EXTRACT_SUBREG 6860 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6861 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6862 ssub))>; 6863def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6864 (i32 (EXTRACT_SUBREG 6865 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6866 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6867 ssub))>; 6868 6869def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6870 (i64 (EXTRACT_SUBREG 6871 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6872 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6873 dsub))>; 6874} 6875 6876multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, 6877 Intrinsic intOp> { 6878 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6879 (i32 (EXTRACT_SUBREG 6880 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6881 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6882 ssub))>; 6883def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6884 (i32 (EXTRACT_SUBREG 6885 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6886 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6887 ssub))>; 6888 6889def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6890 (i32 (EXTRACT_SUBREG 6891 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6892 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6893 ssub))>; 6894def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6895 (i32 (EXTRACT_SUBREG 6896 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6897 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6898 ssub))>; 6899 6900def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6901 (i64 (EXTRACT_SUBREG 6902 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6903 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6904 dsub))>; 6905} 6906 6907defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; 6908defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; 6909 6910// The vaddlv_s32 intrinsic gets mapped to SADDLP. 6911def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), 6912 (i64 (EXTRACT_SUBREG 6913 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6914 (SADDLPv2i32_v1i64 V64:$Rn), dsub), 6915 dsub))>; 6916// The vaddlv_u32 intrinsic gets mapped to UADDLP. 6917def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), 6918 (i64 (EXTRACT_SUBREG 6919 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6920 (UADDLPv2i32_v1i64 V64:$Rn), dsub), 6921 dsub))>; 6922 6923//------------------------------------------------------------------------------ 6924// AdvSIMD modified immediate instructions 6925//------------------------------------------------------------------------------ 6926 6927// AdvSIMD BIC 6928defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; 6929// AdvSIMD ORR 6930defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; 6931 6932def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6933def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6934def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6935def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6936 6937def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6938def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6939def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6940def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6941 6942def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6943def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6944def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6945def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6946 6947def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6948def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6949def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6950def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6951 6952// AdvSIMD FMOV 6953def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, 6954 "fmov", ".2d", 6955 [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6956def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, 6957 "fmov", ".2s", 6958 [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6959def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, 6960 "fmov", ".4s", 6961 [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6962let Predicates = [HasNEON, HasFullFP16] in { 6963def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, 6964 "fmov", ".4h", 6965 [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6966def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, 6967 "fmov", ".8h", 6968 [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6969} // Predicates = [HasNEON, HasFullFP16] 6970 6971// AdvSIMD MOVI 6972 6973// EDIT byte mask: scalar 6974let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6975def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", 6976 [(set FPR64:$Rd, simdimmtype10:$imm8)]>; 6977// The movi_edit node has the immediate value already encoded, so we use 6978// a plain imm0_255 here. 6979def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), 6980 (MOVID imm0_255:$shift)>; 6981 6982// EDIT byte mask: 2d 6983 6984// The movi_edit node has the immediate value already encoded, so we use 6985// a plain imm0_255 in the pattern 6986let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6987def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, 6988 simdimmtype10, 6989 "movi", ".2d", 6990 [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; 6991 6992def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6993def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6994def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6995def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6996def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6997def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6998def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6999def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 7000 7001def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7002def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7003def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7004def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; 7005 7006// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the 7007// extract is free and this gives better MachineCSE results. 7008def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7009def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7010def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7011def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 7012def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>; 7013def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>; 7014def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>; 7015def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>; 7016 7017def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7018def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7019def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7020def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 7021 7022// EDIT per word & halfword: 2s, 4h, 4s, & 8h 7023let isReMaterializable = 1, isAsCheapAsAMove = 1 in 7024defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; 7025 7026let Predicates = [HasNEON] in { 7027 // Using the MOVI to materialize fp constants. 7028 def : Pat<(f32 fpimm32SIMDModImmType4:$in), 7029 (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in), 7030 (i32 24)), 7031 ssub)>; 7032} 7033 7034def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7035def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7036def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7037def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7038 7039def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7040def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7041def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7042def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7043 7044def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7045 (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; 7046def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7047 (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; 7048def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7049 (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; 7050def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 7051 (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; 7052 7053let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 7054// EDIT per word: 2s & 4s with MSL shifter 7055def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", 7056 [(set (v2i32 V64:$Rd), 7057 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7058def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", 7059 [(set (v4i32 V128:$Rd), 7060 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7061 7062// Per byte: 8b & 16b 7063def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, 7064 "movi", ".8b", 7065 [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; 7066 7067def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, 7068 "movi", ".16b", 7069 [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; 7070} 7071 7072// AdvSIMD MVNI 7073 7074// EDIT per word & halfword: 2s, 4h, 4s, & 8h 7075let isReMaterializable = 1, isAsCheapAsAMove = 1 in 7076defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; 7077 7078def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7079def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7080def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7081def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7082 7083def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 7084def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 7085def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 7086def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 7087 7088def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7089 (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; 7090def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7091 (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; 7092def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7093 (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; 7094def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 7095 (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; 7096 7097// EDIT per word: 2s & 4s with MSL shifter 7098let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 7099def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", 7100 [(set (v2i32 V64:$Rd), 7101 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7102def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", 7103 [(set (v4i32 V128:$Rd), 7104 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 7105} 7106 7107//---------------------------------------------------------------------------- 7108// AdvSIMD indexed element 7109//---------------------------------------------------------------------------- 7110 7111let hasSideEffects = 0 in { 7112 defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; 7113 defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; 7114} 7115 7116// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the 7117// instruction expects the addend first, while the intrinsic expects it last. 7118 7119// On the other hand, there are quite a few valid combinatorial options due to 7120// the commutativity of multiplication and the fact that (-x) * y = x * (-y). 7121defm : SIMDFPIndexedTiedPatterns<"FMLA", 7122 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>; 7123defm : SIMDFPIndexedTiedPatterns<"FMLA", 7124 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>; 7125 7126defm : SIMDFPIndexedTiedPatterns<"FMLS", 7127 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 7128defm : SIMDFPIndexedTiedPatterns<"FMLS", 7129 TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; 7130defm : SIMDFPIndexedTiedPatterns<"FMLS", 7131 TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; 7132defm : SIMDFPIndexedTiedPatterns<"FMLS", 7133 TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; 7134 7135multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { 7136 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit 7137 // and DUP scalar. 7138 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 7139 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 7140 VectorIndexS:$idx))), 7141 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; 7142 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 7143 (v2f32 (AArch64duplane32 7144 (v4f32 (insert_subvector undef, 7145 (v2f32 (fneg V64:$Rm)), 7146 (i64 0))), 7147 VectorIndexS:$idx)))), 7148 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 7149 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 7150 VectorIndexS:$idx)>; 7151 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 7152 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 7153 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 7154 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 7155 7156 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit 7157 // and DUP scalar. 7158 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 7159 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 7160 VectorIndexS:$idx))), 7161 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, 7162 VectorIndexS:$idx)>; 7163 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 7164 (v4f32 (AArch64duplane32 7165 (v4f32 (insert_subvector undef, 7166 (v2f32 (fneg V64:$Rm)), 7167 (i64 0))), 7168 VectorIndexS:$idx)))), 7169 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 7170 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 7171 VectorIndexS:$idx)>; 7172 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 7173 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 7174 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 7175 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 7176 7177 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar 7178 // (DUPLANE from 64-bit would be trivial). 7179 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 7180 (AArch64duplane64 (v2f64 (fneg V128:$Rm)), 7181 VectorIndexD:$idx))), 7182 (FMLSv2i64_indexed 7183 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; 7184 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 7185 (AArch64dup (f64 (fneg FPR64Op:$Rm))))), 7186 (FMLSv2i64_indexed V128:$Rd, V128:$Rn, 7187 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; 7188 7189 // 2 variants for 32-bit scalar version: extract from .2s or from .4s 7190 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 7191 (vector_extract (v4f32 (fneg V128:$Rm)), 7192 VectorIndexS:$idx))), 7193 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 7194 V128:$Rm, VectorIndexS:$idx)>; 7195 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 7196 (vector_extract (v4f32 (insert_subvector undef, 7197 (v2f32 (fneg V64:$Rm)), 7198 (i64 0))), 7199 VectorIndexS:$idx))), 7200 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 7201 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; 7202 7203 // 1 variant for 64-bit scalar version: extract from .1d or from .2d 7204 def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), 7205 (vector_extract (v2f64 (fneg V128:$Rm)), 7206 VectorIndexS:$idx))), 7207 (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, 7208 V128:$Rm, VectorIndexS:$idx)>; 7209} 7210 7211defm : FMLSIndexedAfterNegPatterns< 7212 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 7213defm : FMLSIndexedAfterNegPatterns< 7214 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >; 7215 7216defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; 7217defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>; 7218 7219def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 7220 (FMULv2i32_indexed V64:$Rn, 7221 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 7222 (i64 0))>; 7223def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 7224 (FMULv4i32_indexed V128:$Rn, 7225 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 7226 (i64 0))>; 7227def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), 7228 (FMULv2i64_indexed V128:$Rn, 7229 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), 7230 (i64 0))>; 7231 7232defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; 7233defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 7234 7235defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane, 7236 int_aarch64_neon_sqdmulh_laneq>; 7237defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane, 7238 int_aarch64_neon_sqrdmulh_laneq>; 7239 7240// Generated by MachineCombine 7241defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>; 7242defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; 7243 7244defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; 7245defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", 7246 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 7247defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", 7248 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 7249defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>; 7250defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", 7251 int_aarch64_neon_sqadd>; 7252defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", 7253 int_aarch64_neon_sqsub>; 7254defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", 7255 int_aarch64_neon_sqrdmlah>; 7256defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", 7257 int_aarch64_neon_sqrdmlsh>; 7258defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; 7259defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", 7260 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 7261defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", 7262 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 7263defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>; 7264 7265// A scalar sqdmull with the second operand being a vector lane can be 7266// handled directly with the indexed instruction encoding. 7267def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 7268 (vector_extract (v4i32 V128:$Vm), 7269 VectorIndexS:$idx)), 7270 (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; 7271 7272//---------------------------------------------------------------------------- 7273// AdvSIMD scalar shift instructions 7274//---------------------------------------------------------------------------- 7275defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; 7276defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; 7277defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; 7278defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; 7279// Codegen patterns for the above. We don't put these directly on the 7280// instructions because TableGen's type inference can't handle the truth. 7281// Having the same base pattern for fp <--> int totally freaks it out. 7282def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), 7283 (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; 7284def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), 7285 (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; 7286def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), 7287 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 7288def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), 7289 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 7290def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), 7291 vecshiftR64:$imm)), 7292 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 7293def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), 7294 vecshiftR64:$imm)), 7295 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 7296def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), 7297 (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 7298def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 7299 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7300def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), 7301 vecshiftR64:$imm)), 7302 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7303def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 7304 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7305def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), 7306 vecshiftR64:$imm)), 7307 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7308def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), 7309 (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 7310 7311// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported. 7312 7313def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), 7314 (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7315def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), 7316 (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7317def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 7318 (SCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>; 7319def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp 7320 (and FPR32:$Rn, (i32 65535)), 7321 vecshiftR16:$imm)), 7322 (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7323def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), 7324 (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; 7325def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 7326 (UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>; 7327def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), 7328 (i32 (INSERT_SUBREG 7329 (i32 (IMPLICIT_DEF)), 7330 (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), 7331 hsub))>; 7332def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), 7333 (i64 (INSERT_SUBREG 7334 (i64 (IMPLICIT_DEF)), 7335 (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), 7336 hsub))>; 7337def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), 7338 (i32 (INSERT_SUBREG 7339 (i32 (IMPLICIT_DEF)), 7340 (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), 7341 hsub))>; 7342def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), 7343 (i64 (INSERT_SUBREG 7344 (i64 (IMPLICIT_DEF)), 7345 (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), 7346 hsub))>; 7347def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 7348 (i32 (INSERT_SUBREG 7349 (i32 (IMPLICIT_DEF)), 7350 (FACGE16 FPR16:$Rn, FPR16:$Rm), 7351 hsub))>; 7352def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 7353 (i32 (INSERT_SUBREG 7354 (i32 (IMPLICIT_DEF)), 7355 (FACGT16 FPR16:$Rn, FPR16:$Rm), 7356 hsub))>; 7357 7358defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; 7359defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; 7360defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", 7361 int_aarch64_neon_sqrshrn>; 7362defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", 7363 int_aarch64_neon_sqrshrun>; 7364defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 7365defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 7366defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", 7367 int_aarch64_neon_sqshrn>; 7368defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", 7369 int_aarch64_neon_sqshrun>; 7370defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; 7371defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; 7372defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", 7373 TriOpFrag<(add node:$LHS, 7374 (AArch64srshri node:$MHS, node:$RHS))>>; 7375defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; 7376defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", 7377 TriOpFrag<(add_and_or_is_add node:$LHS, 7378 (AArch64vashr node:$MHS, node:$RHS))>>; 7379defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", 7380 int_aarch64_neon_uqrshrn>; 7381defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 7382defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", 7383 int_aarch64_neon_uqshrn>; 7384defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; 7385defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", 7386 TriOpFrag<(add node:$LHS, 7387 (AArch64urshri node:$MHS, node:$RHS))>>; 7388defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; 7389defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", 7390 TriOpFrag<(add_and_or_is_add node:$LHS, 7391 (AArch64vlshr node:$MHS, node:$RHS))>>; 7392 7393//---------------------------------------------------------------------------- 7394// AdvSIMD vector shift instructions 7395//---------------------------------------------------------------------------- 7396defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; 7397defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; 7398defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", 7399 int_aarch64_neon_vcvtfxs2fp>; 7400defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>; 7401defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; 7402 7403// X << 1 ==> X + X 7404class SHLToADDPat<ValueType ty, RegisterClass regtype> 7405 : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))), 7406 (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>; 7407 7408def : SHLToADDPat<v16i8, FPR128>; 7409def : SHLToADDPat<v8i16, FPR128>; 7410def : SHLToADDPat<v4i32, FPR128>; 7411def : SHLToADDPat<v2i64, FPR128>; 7412def : SHLToADDPat<v8i8, FPR64>; 7413def : SHLToADDPat<v4i16, FPR64>; 7414def : SHLToADDPat<v2i32, FPR64>; 7415 7416defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", 7417 BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; 7418defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>; 7419def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 7420 (i32 vecshiftL64:$imm))), 7421 (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; 7422defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", 7423 int_aarch64_neon_sqrshrn>; 7424defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", 7425 int_aarch64_neon_sqrshrun>; 7426defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 7427defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 7428defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", 7429 int_aarch64_neon_sqshrn>; 7430defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", 7431 int_aarch64_neon_sqshrun>; 7432defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>; 7433def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 7434 (i32 vecshiftR64:$imm))), 7435 (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; 7436defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; 7437defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", 7438 TriOpFrag<(add node:$LHS, 7439 (AArch64srshri node:$MHS, node:$RHS))> >; 7440defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", 7441 BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; 7442 7443defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; 7444defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", 7445 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; 7446defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", 7447 int_aarch64_neon_vcvtfxu2fp>; 7448defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", 7449 int_aarch64_neon_uqrshrn>; 7450defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 7451defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", 7452 int_aarch64_neon_uqshrn>; 7453defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; 7454defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", 7455 TriOpFrag<(add node:$LHS, 7456 (AArch64urshri node:$MHS, node:$RHS))> >; 7457defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", 7458 BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; 7459defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; 7460defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", 7461 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; 7462 7463// RADDHN patterns for when RSHRN shifts by half the size of the vector element 7464def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))), 7465 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; 7466def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))), 7467 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; 7468let AddedComplexity = 5 in 7469def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))), 7470 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; 7471def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))), 7472 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; 7473def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))), 7474 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; 7475def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))), 7476 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; 7477 7478// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element 7479def : Pat<(v16i8 (concat_vectors 7480 (v8i8 V64:$Vd), 7481 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))), 7482 (RADDHNv8i16_v16i8 7483 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7484 (v8i16 (MOVIv2d_ns (i32 0))))>; 7485def : Pat<(v8i16 (concat_vectors 7486 (v4i16 V64:$Vd), 7487 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))), 7488 (RADDHNv4i32_v8i16 7489 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7490 (v4i32 (MOVIv2d_ns (i32 0))))>; 7491let AddedComplexity = 5 in 7492def : Pat<(v4i32 (concat_vectors 7493 (v2i32 V64:$Vd), 7494 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))), 7495 (RADDHNv2i64_v4i32 7496 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7497 (v2i64 (MOVIv2d_ns (i32 0))))>; 7498def : Pat<(v16i8 (concat_vectors 7499 (v8i8 V64:$Vd), 7500 (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))), 7501 (RADDHNv8i16_v16i8 7502 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7503 (v8i16 (MOVIv2d_ns (i32 0))))>; 7504def : Pat<(v8i16 (concat_vectors 7505 (v4i16 V64:$Vd), 7506 (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))), 7507 (RADDHNv4i32_v8i16 7508 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7509 (v4i32 (MOVIv2d_ns (i32 0))))>; 7510def : Pat<(v4i32 (concat_vectors 7511 (v2i32 V64:$Vd), 7512 (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))), 7513 (RADDHNv2i64_v4i32 7514 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7515 (v2i64 (MOVIv2d_ns (i32 0))))>; 7516 7517// SHRN patterns for when a logical right shift was used instead of arithmetic 7518// (the immediate guarantees no sign bits actually end up in the result so it 7519// doesn't matter). 7520def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), 7521 (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; 7522def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), 7523 (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; 7524def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), 7525 (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; 7526 7527def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), 7528 (trunc (AArch64vlshr (v8i16 V128:$Rn), 7529 vecshiftR16Narrow:$imm)))), 7530 (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7531 V128:$Rn, vecshiftR16Narrow:$imm)>; 7532def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), 7533 (trunc (AArch64vlshr (v4i32 V128:$Rn), 7534 vecshiftR32Narrow:$imm)))), 7535 (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7536 V128:$Rn, vecshiftR32Narrow:$imm)>; 7537def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), 7538 (trunc (AArch64vlshr (v2i64 V128:$Rn), 7539 vecshiftR64Narrow:$imm)))), 7540 (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7541 V128:$Rn, vecshiftR32Narrow:$imm)>; 7542 7543// Vector sign and zero extensions are implemented with SSHLL and USSHLL. 7544// Anyexts are implemented as zexts. 7545def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; 7546def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 7547def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 7548def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; 7549def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 7550def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 7551def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; 7552def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 7553def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 7554// Also match an extend from the upper half of a 128 bit source register. 7555def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), 7556 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 7557def : Pat<(v8i16 (zext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), 7558 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 7559def : Pat<(v8i16 (sext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), 7560 (SSHLLv16i8_shift V128:$Rn, (i32 0))>; 7561def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), 7562 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 7563def : Pat<(v4i32 (zext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), 7564 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 7565def : Pat<(v4i32 (sext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), 7566 (SSHLLv8i16_shift V128:$Rn, (i32 0))>; 7567def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), 7568 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 7569def : Pat<(v2i64 (zext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), 7570 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 7571def : Pat<(v2i64 (sext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), 7572 (SSHLLv4i32_shift V128:$Rn, (i32 0))>; 7573 7574// Vector shift sxtl aliases 7575def : InstAlias<"sxtl.8h $dst, $src1", 7576 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7577def : InstAlias<"sxtl $dst.8h, $src1.8b", 7578 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7579def : InstAlias<"sxtl.4s $dst, $src1", 7580 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7581def : InstAlias<"sxtl $dst.4s, $src1.4h", 7582 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7583def : InstAlias<"sxtl.2d $dst, $src1", 7584 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7585def : InstAlias<"sxtl $dst.2d, $src1.2s", 7586 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7587 7588// Vector shift sxtl2 aliases 7589def : InstAlias<"sxtl2.8h $dst, $src1", 7590 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7591def : InstAlias<"sxtl2 $dst.8h, $src1.16b", 7592 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7593def : InstAlias<"sxtl2.4s $dst, $src1", 7594 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7595def : InstAlias<"sxtl2 $dst.4s, $src1.8h", 7596 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7597def : InstAlias<"sxtl2.2d $dst, $src1", 7598 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7599def : InstAlias<"sxtl2 $dst.2d, $src1.4s", 7600 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7601 7602// Vector shift uxtl aliases 7603def : InstAlias<"uxtl.8h $dst, $src1", 7604 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7605def : InstAlias<"uxtl $dst.8h, $src1.8b", 7606 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7607def : InstAlias<"uxtl.4s $dst, $src1", 7608 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7609def : InstAlias<"uxtl $dst.4s, $src1.4h", 7610 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7611def : InstAlias<"uxtl.2d $dst, $src1", 7612 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7613def : InstAlias<"uxtl $dst.2d, $src1.2s", 7614 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7615 7616// Vector shift uxtl2 aliases 7617def : InstAlias<"uxtl2.8h $dst, $src1", 7618 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7619def : InstAlias<"uxtl2 $dst.8h, $src1.16b", 7620 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7621def : InstAlias<"uxtl2.4s $dst, $src1", 7622 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7623def : InstAlias<"uxtl2 $dst.4s, $src1.8h", 7624 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7625def : InstAlias<"uxtl2.2d $dst, $src1", 7626 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7627def : InstAlias<"uxtl2 $dst.2d, $src1.4s", 7628 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7629 7630// If an integer is about to be converted to a floating point value, 7631// just load it on the floating point unit. 7632// These patterns are more complex because floating point loads do not 7633// support sign extension. 7634// The sign extension has to be explicitly added and is only supported for 7635// one step: byte-to-half, half-to-word, word-to-doubleword. 7636// SCVTF GPR -> FPR is 9 cycles. 7637// SCVTF FPR -> FPR is 4 cyclces. 7638// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. 7639// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR 7640// and still being faster. 7641// However, this is not good for code size. 7642// 8-bits -> float. 2 sizes step-up. 7643class SExtLoadi8CVTf32Pat<dag addrmode, dag INST> 7644 : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), 7645 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7646 (SSHLLv4i16_shift 7647 (f64 7648 (EXTRACT_SUBREG 7649 (SSHLLv8i8_shift 7650 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7651 INST, 7652 bsub), 7653 0), 7654 dsub)), 7655 0), 7656 ssub)))>, 7657 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7658 7659def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), 7660 (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; 7661def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), 7662 (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; 7663def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), 7664 (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; 7665def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), 7666 (LDURBi GPR64sp:$Rn, simm9:$offset)>; 7667 7668// 16-bits -> float. 1 size step-up. 7669class SExtLoadi16CVTf32Pat<dag addrmode, dag INST> 7670 : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7671 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7672 (SSHLLv4i16_shift 7673 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7674 INST, 7675 hsub), 7676 0), 7677 ssub)))>, 7678 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7679 7680def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7681 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7682def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7683 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7684def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7685 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7686def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7687 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7688 7689// 32-bits to 32-bits are handled in target specific dag combine: 7690// performIntToFpCombine. 7691// 64-bits integer to 32-bits floating point, not possible with 7692// SCVTF on floating point registers (both source and destination 7693// must have the same size). 7694 7695// Here are the patterns for 8, 16, 32, and 64-bits to double. 7696// 8-bits -> double. 3 size step-up: give up. 7697// 16-bits -> double. 2 size step. 7698class SExtLoadi16CVTf64Pat<dag addrmode, dag INST> 7699 : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7700 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7701 (SSHLLv2i32_shift 7702 (f64 7703 (EXTRACT_SUBREG 7704 (SSHLLv4i16_shift 7705 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7706 INST, 7707 hsub), 7708 0), 7709 dsub)), 7710 0), 7711 dsub)))>, 7712 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7713 7714def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7715 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7716def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7717 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7718def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7719 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7720def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7721 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7722// 32-bits -> double. 1 size step-up. 7723class SExtLoadi32CVTf64Pat<dag addrmode, dag INST> 7724 : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), 7725 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7726 (SSHLLv2i32_shift 7727 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7728 INST, 7729 ssub), 7730 0), 7731 dsub)))>, 7732 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7733 7734def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), 7735 (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; 7736def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), 7737 (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; 7738def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), 7739 (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; 7740def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), 7741 (LDURSi GPR64sp:$Rn, simm9:$offset)>; 7742 7743// 64-bits -> double are handled in target specific dag combine: 7744// performIntToFpCombine. 7745 7746 7747//---------------------------------------------------------------------------- 7748// AdvSIMD Load-Store Structure 7749//---------------------------------------------------------------------------- 7750defm LD1 : SIMDLd1Multiple<"ld1">; 7751defm LD2 : SIMDLd2Multiple<"ld2">; 7752defm LD3 : SIMDLd3Multiple<"ld3">; 7753defm LD4 : SIMDLd4Multiple<"ld4">; 7754 7755defm ST1 : SIMDSt1Multiple<"st1">; 7756defm ST2 : SIMDSt2Multiple<"st2">; 7757defm ST3 : SIMDSt3Multiple<"st3">; 7758defm ST4 : SIMDSt4Multiple<"st4">; 7759 7760class Ld1Pat<ValueType ty, Instruction INST> 7761 : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; 7762 7763def : Ld1Pat<v16i8, LD1Onev16b>; 7764def : Ld1Pat<v8i16, LD1Onev8h>; 7765def : Ld1Pat<v4i32, LD1Onev4s>; 7766def : Ld1Pat<v2i64, LD1Onev2d>; 7767def : Ld1Pat<v8i8, LD1Onev8b>; 7768def : Ld1Pat<v4i16, LD1Onev4h>; 7769def : Ld1Pat<v2i32, LD1Onev2s>; 7770def : Ld1Pat<v1i64, LD1Onev1d>; 7771 7772class St1Pat<ValueType ty, Instruction INST> 7773 : Pat<(store ty:$Vt, GPR64sp:$Rn), 7774 (INST ty:$Vt, GPR64sp:$Rn)>; 7775 7776def : St1Pat<v16i8, ST1Onev16b>; 7777def : St1Pat<v8i16, ST1Onev8h>; 7778def : St1Pat<v4i32, ST1Onev4s>; 7779def : St1Pat<v2i64, ST1Onev2d>; 7780def : St1Pat<v8i8, ST1Onev8b>; 7781def : St1Pat<v4i16, ST1Onev4h>; 7782def : St1Pat<v2i32, ST1Onev2s>; 7783def : St1Pat<v1i64, ST1Onev1d>; 7784 7785//--- 7786// Single-element 7787//--- 7788 7789defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; 7790defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; 7791defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; 7792defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; 7793let mayLoad = 1, hasSideEffects = 0 in { 7794defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; 7795defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; 7796defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; 7797defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; 7798defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; 7799defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; 7800defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; 7801defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; 7802defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; 7803defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; 7804defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; 7805defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; 7806defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; 7807defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; 7808defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; 7809defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; 7810} 7811 7812def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7813 (LD1Rv8b GPR64sp:$Rn)>; 7814def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7815 (LD1Rv16b GPR64sp:$Rn)>; 7816def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7817 (LD1Rv4h GPR64sp:$Rn)>; 7818def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7819 (LD1Rv8h GPR64sp:$Rn)>; 7820def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7821 (LD1Rv2s GPR64sp:$Rn)>; 7822def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7823 (LD1Rv4s GPR64sp:$Rn)>; 7824def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7825 (LD1Rv2d GPR64sp:$Rn)>; 7826def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7827 (LD1Rv1d GPR64sp:$Rn)>; 7828 7829def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7830 (LD1Rv8b GPR64sp:$Rn)>; 7831def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))), 7832 (LD1Rv16b GPR64sp:$Rn)>; 7833def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7834 (LD1Rv4h GPR64sp:$Rn)>; 7835def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))), 7836 (LD1Rv8h GPR64sp:$Rn)>; 7837def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7838 (LD1Rv2s GPR64sp:$Rn)>; 7839def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))), 7840 (LD1Rv4s GPR64sp:$Rn)>; 7841def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))), 7842 (LD1Rv2d GPR64sp:$Rn)>; 7843 7844// Grab the floating point version too 7845def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7846 (LD1Rv2s GPR64sp:$Rn)>; 7847def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7848 (LD1Rv4s GPR64sp:$Rn)>; 7849def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7850 (LD1Rv2d GPR64sp:$Rn)>; 7851def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7852 (LD1Rv1d GPR64sp:$Rn)>; 7853def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7854 (LD1Rv4h GPR64sp:$Rn)>; 7855def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7856 (LD1Rv8h GPR64sp:$Rn)>; 7857def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7858 (LD1Rv4h GPR64sp:$Rn)>; 7859def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7860 (LD1Rv8h GPR64sp:$Rn)>; 7861 7862class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, 7863 ValueType VTy, ValueType STy, Instruction LD1> 7864 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7865 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7866 (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; 7867 7868def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; 7869def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; 7870def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; 7871def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; 7872def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; 7873def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; 7874def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>; 7875def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>; 7876 7877// Generate LD1 for extload if memory type does not match the 7878// destination type, for example: 7879// 7880// (v4i32 (insert_vector_elt (load anyext from i8) idx)) 7881// 7882// In this case, the index must be adjusted to match LD1 type. 7883// 7884class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand 7885 VecIndex, ValueType VTy, ValueType STy, 7886 Instruction LD1, SDNodeXForm IdxOp> 7887 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7888 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7889 (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>; 7890 7891class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex, 7892 ValueType VTy, ValueType STy, Instruction LD1, 7893 SDNodeXForm IdxOp> 7894 : Pat<(vector_insert (VTy VecListOne64:$Rd), 7895 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7896 (EXTRACT_SUBREG 7897 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 7898 (IdxOp VecIndex:$idx), GPR64sp:$Rn), 7899 dsub)>; 7900 7901def VectorIndexStoH : SDNodeXForm<imm, [{ 7902 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7903}]>; 7904def VectorIndexStoB : SDNodeXForm<imm, [{ 7905 return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64); 7906}]>; 7907def VectorIndexHtoB : SDNodeXForm<imm, [{ 7908 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7909}]>; 7910 7911def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>; 7912def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>; 7913def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>; 7914 7915def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>; 7916def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>; 7917def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>; 7918 7919// Same as above, but the first element is populated using 7920// scalar_to_vector + insert_subvector instead of insert_vector_elt. 7921let Predicates = [IsNeonAvailable] in { 7922 class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy, 7923 SDPatternOperator ExtLoad, Instruction LD1> 7924 : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))), 7925 (ResultTy (EXTRACT_SUBREG 7926 (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>; 7927 7928 def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>; 7929 def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>; 7930 def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>; 7931} 7932class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, 7933 ValueType VTy, ValueType STy, Instruction LD1> 7934 : Pat<(vector_insert (VTy VecListOne64:$Rd), 7935 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7936 (EXTRACT_SUBREG 7937 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 7938 VecIndex:$idx, GPR64sp:$Rn), 7939 dsub)>; 7940 7941def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; 7942def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; 7943def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; 7944def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; 7945def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>; 7946def : Ld1Lane64Pat<load, VectorIndexH, v4bf16, bf16, LD1i16>; 7947 7948 7949defm LD1 : SIMDLdSt1SingleAliases<"ld1">; 7950defm LD2 : SIMDLdSt2SingleAliases<"ld2">; 7951defm LD3 : SIMDLdSt3SingleAliases<"ld3">; 7952defm LD4 : SIMDLdSt4SingleAliases<"ld4">; 7953 7954// Stores 7955defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; 7956defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; 7957defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; 7958defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; 7959 7960let AddedComplexity = 19 in 7961class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, 7962 ValueType VTy, ValueType STy, Instruction ST1> 7963 : Pat<(scalar_store 7964 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7965 GPR64sp:$Rn), 7966 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; 7967 7968def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; 7969def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; 7970def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; 7971def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; 7972def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; 7973def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; 7974def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>; 7975def : St1Lane128Pat<store, VectorIndexH, v8bf16, bf16, ST1i16>; 7976 7977let AddedComplexity = 19 in 7978class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, 7979 ValueType VTy, ValueType STy, Instruction ST1> 7980 : Pat<(scalar_store 7981 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7982 GPR64sp:$Rn), 7983 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7984 VecIndex:$idx, GPR64sp:$Rn)>; 7985 7986def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; 7987def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; 7988def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; 7989def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; 7990def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>; 7991def : St1Lane64Pat<store, VectorIndexH, v4bf16, bf16, ST1i16>; 7992 7993multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, 7994 ValueType VTy, ValueType STy, Instruction ST1, 7995 int offset> { 7996 def : Pat<(scalar_store 7997 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7998 GPR64sp:$Rn, offset), 7999 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 8000 VecIndex:$idx, GPR64sp:$Rn, XZR)>; 8001 8002 def : Pat<(scalar_store 8003 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 8004 GPR64sp:$Rn, GPR64:$Rm), 8005 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 8006 VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 8007} 8008 8009defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; 8010defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, 8011 2>; 8012defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; 8013defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; 8014defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; 8015defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; 8016defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>; 8017defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>; 8018 8019multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, 8020 ValueType VTy, ValueType STy, Instruction ST1, 8021 int offset> { 8022 def : Pat<(scalar_store 8023 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 8024 GPR64sp:$Rn, offset), 8025 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; 8026 8027 def : Pat<(scalar_store 8028 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 8029 GPR64sp:$Rn, GPR64:$Rm), 8030 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 8031} 8032 8033defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, 8034 1>; 8035defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, 8036 2>; 8037defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; 8038defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; 8039defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; 8040defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; 8041defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>; 8042defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>; 8043 8044let mayStore = 1, hasSideEffects = 0 in { 8045defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; 8046defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; 8047defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; 8048defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; 8049defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; 8050defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; 8051defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; 8052defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; 8053defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; 8054defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; 8055defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; 8056defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; 8057} 8058 8059defm ST1 : SIMDLdSt1SingleAliases<"st1">; 8060defm ST2 : SIMDLdSt2SingleAliases<"st2">; 8061defm ST3 : SIMDLdSt3SingleAliases<"st3">; 8062defm ST4 : SIMDLdSt4SingleAliases<"st4">; 8063 8064//---------------------------------------------------------------------------- 8065// Crypto extensions 8066//---------------------------------------------------------------------------- 8067 8068let Predicates = [HasAES] in { 8069def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; 8070def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; 8071def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; 8072def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; 8073} 8074 8075// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required 8076// for AES fusion on some CPUs. 8077let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 8078def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 8079 Sched<[WriteVq]>; 8080def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 8081 Sched<[WriteVq]>; 8082} 8083 8084// Only use constrained versions of AES(I)MC instructions if they are paired with 8085// AESE/AESD. 8086def : Pat<(v16i8 (int_aarch64_crypto_aesmc 8087 (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), 8088 (v16i8 V128:$src2))))), 8089 (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), 8090 (v16i8 V128:$src2)))))>, 8091 Requires<[HasFuseAES]>; 8092 8093def : Pat<(v16i8 (int_aarch64_crypto_aesimc 8094 (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), 8095 (v16i8 V128:$src2))))), 8096 (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), 8097 (v16i8 V128:$src2)))))>, 8098 Requires<[HasFuseAES]>; 8099 8100let Predicates = [HasSHA2] in { 8101def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; 8102def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; 8103def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; 8104def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; 8105def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; 8106def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; 8107def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; 8108 8109def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; 8110def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; 8111def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; 8112} 8113 8114//---------------------------------------------------------------------------- 8115// Compiler-pseudos 8116//---------------------------------------------------------------------------- 8117// FIXME: Like for X86, these should go in their own separate .td file. 8118 8119// For an anyext, we don't care what the high bits are, so we can perform an 8120// INSERT_SUBREF into an IMPLICIT_DEF. 8121def : Pat<(i64 (anyext GPR32:$src)), 8122 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; 8123 8124// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and 8125// then assert the extension has happened. 8126def : Pat<(i64 (zext GPR32:$src)), 8127 (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 8128 8129// To sign extend, we use a signed bitfield move instruction (SBFM) on the 8130// containing super-reg. 8131def : Pat<(i64 (sext GPR32:$src)), 8132 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 8133def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; 8134def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; 8135def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; 8136def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; 8137def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; 8138def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; 8139def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; 8140 8141def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), 8142 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 8143 (i64 (i32shift_sext_i8 imm0_31:$imm)))>; 8144def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), 8145 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 8146 (i64 (i64shift_sext_i8 imm0_63:$imm)))>; 8147 8148def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), 8149 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 8150 (i64 (i32shift_sext_i16 imm0_31:$imm)))>; 8151def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), 8152 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 8153 (i64 (i64shift_sext_i16 imm0_63:$imm)))>; 8154 8155def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), 8156 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 8157 (i64 (i64shift_a imm0_63:$imm)), 8158 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 8159 8160def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)), 8161 (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 8162 (i64 (i64shift_a imm0_63:$imm)), 8163 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 8164 8165// sra patterns have an AddedComplexity of 10, so make sure we have a higher 8166// AddedComplexity for the following patterns since we want to match sext + sra 8167// patterns before we attempt to match a single sra node. 8168let AddedComplexity = 20 in { 8169// We support all sext + sra combinations which preserve at least one bit of the 8170// original value which is to be sign extended. E.g. we support shifts up to 8171// bitwidth-1 bits. 8172def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), 8173 (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; 8174def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), 8175 (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; 8176 8177def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), 8178 (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; 8179def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), 8180 (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; 8181 8182def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), 8183 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 8184 (i64 imm0_31:$imm), 31)>; 8185} // AddedComplexity = 20 8186 8187// To truncate, we can simply extract from a subregister. 8188def : Pat<(i32 (trunc GPR64sp:$src)), 8189 (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; 8190 8191// __builtin_trap() uses the BRK instruction on AArch64. 8192def : Pat<(trap), (BRK 1)>; 8193def : Pat<(debugtrap), (BRK 0xF000)>; 8194 8195def ubsan_trap_xform : SDNodeXForm<timm, [{ 8196 return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32); 8197}]>; 8198 8199def ubsan_trap_imm : TImmLeaf<i32, [{ 8200 return isUInt<8>(Imm); 8201}], ubsan_trap_xform>; 8202 8203def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>; 8204 8205// Multiply high patterns which multiply the lower subvector using smull/umull 8206// and the upper subvector with smull2/umull2. Then shuffle the high the high 8207// part of both results together. 8208def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)), 8209 (UZP2v16i8 8210 (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 8211 (EXTRACT_SUBREG V128:$Rm, dsub)), 8212 (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 8213def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)), 8214 (UZP2v8i16 8215 (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 8216 (EXTRACT_SUBREG V128:$Rm, dsub)), 8217 (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 8218def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)), 8219 (UZP2v4i32 8220 (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 8221 (EXTRACT_SUBREG V128:$Rm, dsub)), 8222 (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 8223 8224def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)), 8225 (UZP2v16i8 8226 (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 8227 (EXTRACT_SUBREG V128:$Rm, dsub)), 8228 (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 8229def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)), 8230 (UZP2v8i16 8231 (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 8232 (EXTRACT_SUBREG V128:$Rm, dsub)), 8233 (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 8234def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)), 8235 (UZP2v4i32 8236 (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 8237 (EXTRACT_SUBREG V128:$Rm, dsub)), 8238 (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 8239 8240// Conversions within AdvSIMD types in the same register size are free. 8241// But because we need a consistent lane ordering, in big endian many 8242// conversions require one or more REV instructions. 8243// 8244// Consider a simple memory load followed by a bitconvert then a store. 8245// v0 = load v2i32 8246// v1 = BITCAST v2i32 v0 to v4i16 8247// store v4i16 v2 8248// 8249// In big endian mode every memory access has an implicit byte swap. LDR and 8250// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that 8251// is, they treat the vector as a sequence of elements to be byte-swapped. 8252// The two pairs of instructions are fundamentally incompatible. We've decided 8253// to use LD1/ST1 only to simplify compiler implementation. 8254// 8255// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes 8256// the original code sequence: 8257// v0 = load v2i32 8258// v1 = REV v2i32 (implicit) 8259// v2 = BITCAST v2i32 v1 to v4i16 8260// v3 = REV v4i16 v2 (implicit) 8261// store v4i16 v3 8262// 8263// But this is now broken - the value stored is different to the value loaded 8264// due to lane reordering. To fix this, on every BITCAST we must perform two 8265// other REVs: 8266// v0 = load v2i32 8267// v1 = REV v2i32 (implicit) 8268// v2 = REV v2i32 8269// v3 = BITCAST v2i32 v2 to v4i16 8270// v4 = REV v4i16 8271// v5 = REV v4i16 v4 (implicit) 8272// store v4i16 v5 8273// 8274// This means an extra two instructions, but actually in most cases the two REV 8275// instructions can be combined into one. For example: 8276// (REV64_2s (REV64_4h X)) === (REV32_4h X) 8277// 8278// There is also no 128-bit REV instruction. This must be synthesized with an 8279// EXT instruction. 8280// 8281// Most bitconverts require some sort of conversion. The only exceptions are: 8282// a) Identity conversions - vNfX <-> vNiX 8283// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX 8284// 8285 8286// Natural vector casts (64 bit) 8287foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 8288 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 8289 def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))), 8290 (VT FPR64:$src)>; 8291 8292// Natural vector casts (128 bit) 8293foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 8294 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 8295 def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))), 8296 (VT FPR128:$src)>; 8297 8298let Predicates = [IsLE] in { 8299def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8300def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8301def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8302def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8303def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8304def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8305 8306def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 8307 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8308def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 8309 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8310def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 8311 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8312def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 8313 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8314def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 8315 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8316def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 8317 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8318def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 8319 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8320} 8321let Predicates = [IsBE] in { 8322def : Pat<(v8i8 (bitconvert GPR64:$Xn)), 8323 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8324def : Pat<(v4i16 (bitconvert GPR64:$Xn)), 8325 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8326def : Pat<(v2i32 (bitconvert GPR64:$Xn)), 8327 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8328def : Pat<(v4f16 (bitconvert GPR64:$Xn)), 8329 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8330def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), 8331 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8332def : Pat<(v2f32 (bitconvert GPR64:$Xn)), 8333 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8334 8335def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 8336 (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8337def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 8338 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8339def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 8340 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8341def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 8342 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8343def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 8344 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8345def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 8346 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8347} 8348def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8349def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8350def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), 8351 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8352def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), 8353 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8354def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), 8355 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8356def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; 8357 8358def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), 8359 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; 8360def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), 8361 (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; 8362def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), 8363 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8364def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), 8365 (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; 8366def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 8367 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8368 8369def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>; 8370def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>; 8371 8372let Predicates = [IsLE] in { 8373def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; 8374def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; 8375def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; 8376def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; 8377def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>; 8378def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; 8379} 8380let Predicates = [IsBE] in { 8381def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), 8382 (v1i64 (REV64v2i32 FPR64:$src))>; 8383def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), 8384 (v1i64 (REV64v4i16 FPR64:$src))>; 8385def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), 8386 (v1i64 (REV64v8i8 FPR64:$src))>; 8387def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), 8388 (v1i64 (REV64v4i16 FPR64:$src))>; 8389def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), 8390 (v1i64 (REV64v4i16 FPR64:$src))>; 8391def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), 8392 (v1i64 (REV64v2i32 FPR64:$src))>; 8393} 8394def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; 8395def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 8396 8397let Predicates = [IsLE] in { 8398def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; 8399def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; 8400def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; 8401def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 8402def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; 8403def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; 8404def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>; 8405} 8406let Predicates = [IsBE] in { 8407def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), 8408 (v2i32 (REV64v2i32 FPR64:$src))>; 8409def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), 8410 (v2i32 (REV32v4i16 FPR64:$src))>; 8411def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), 8412 (v2i32 (REV32v8i8 FPR64:$src))>; 8413def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), 8414 (v2i32 (REV64v2i32 FPR64:$src))>; 8415def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), 8416 (v2i32 (REV64v2i32 FPR64:$src))>; 8417def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), 8418 (v2i32 (REV32v4i16 FPR64:$src))>; 8419def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), 8420 (v2i32 (REV32v4i16 FPR64:$src))>; 8421} 8422def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; 8423 8424let Predicates = [IsLE] in { 8425def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; 8426def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; 8427def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; 8428def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 8429def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; 8430def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; 8431} 8432let Predicates = [IsBE] in { 8433def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), 8434 (v4i16 (REV64v4i16 FPR64:$src))>; 8435def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), 8436 (v4i16 (REV32v4i16 FPR64:$src))>; 8437def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), 8438 (v4i16 (REV16v8i8 FPR64:$src))>; 8439def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), 8440 (v4i16 (REV64v4i16 FPR64:$src))>; 8441def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), 8442 (v4i16 (REV32v4i16 FPR64:$src))>; 8443def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), 8444 (v4i16 (REV64v4i16 FPR64:$src))>; 8445} 8446def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; 8447def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>; 8448 8449let Predicates = [IsLE] in { 8450def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; 8451def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; 8452def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; 8453def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; 8454def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; 8455def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; 8456 8457def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8458def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; 8459def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; 8460def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8461def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>; 8462def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8463} 8464let Predicates = [IsBE] in { 8465def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), 8466 (v4f16 (REV64v4i16 FPR64:$src))>; 8467def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), 8468 (v4f16 (REV32v4i16 FPR64:$src))>; 8469def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), 8470 (v4f16 (REV16v8i8 FPR64:$src))>; 8471def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), 8472 (v4f16 (REV64v4i16 FPR64:$src))>; 8473def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), 8474 (v4f16 (REV32v4i16 FPR64:$src))>; 8475def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), 8476 (v4f16 (REV64v4i16 FPR64:$src))>; 8477 8478def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), 8479 (v4bf16 (REV64v4i16 FPR64:$src))>; 8480def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), 8481 (v4bf16 (REV32v4i16 FPR64:$src))>; 8482def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), 8483 (v4bf16 (REV16v8i8 FPR64:$src))>; 8484def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), 8485 (v4bf16 (REV64v4i16 FPR64:$src))>; 8486def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), 8487 (v4bf16 (REV32v4i16 FPR64:$src))>; 8488def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), 8489 (v4bf16 (REV64v4i16 FPR64:$src))>; 8490} 8491def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; 8492def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; 8493 8494let Predicates = [IsLE] in { 8495def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; 8496def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; 8497def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; 8498def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 8499def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; 8500def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; 8501def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; 8502def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>; 8503} 8504let Predicates = [IsBE] in { 8505def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), 8506 (v8i8 (REV64v8i8 FPR64:$src))>; 8507def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), 8508 (v8i8 (REV32v8i8 FPR64:$src))>; 8509def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), 8510 (v8i8 (REV16v8i8 FPR64:$src))>; 8511def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), 8512 (v8i8 (REV64v8i8 FPR64:$src))>; 8513def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), 8514 (v8i8 (REV32v8i8 FPR64:$src))>; 8515def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), 8516 (v8i8 (REV64v8i8 FPR64:$src))>; 8517def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), 8518 (v8i8 (REV16v8i8 FPR64:$src))>; 8519def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), 8520 (v8i8 (REV16v8i8 FPR64:$src))>; 8521} 8522 8523let Predicates = [IsLE] in { 8524def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; 8525def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; 8526def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; 8527def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; 8528def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; 8529def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>; 8530} 8531let Predicates = [IsBE] in { 8532def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), 8533 (f64 (REV64v2i32 FPR64:$src))>; 8534def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), 8535 (f64 (REV64v4i16 FPR64:$src))>; 8536def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), 8537 (f64 (REV64v2i32 FPR64:$src))>; 8538def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), 8539 (f64 (REV64v8i8 FPR64:$src))>; 8540def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), 8541 (f64 (REV64v4i16 FPR64:$src))>; 8542def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), 8543 (f64 (REV64v4i16 FPR64:$src))>; 8544} 8545def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; 8546def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; 8547 8548let Predicates = [IsLE] in { 8549def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; 8550def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; 8551def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; 8552def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; 8553def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; 8554def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>; 8555} 8556let Predicates = [IsBE] in { 8557def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), 8558 (v1f64 (REV64v2i32 FPR64:$src))>; 8559def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), 8560 (v1f64 (REV64v4i16 FPR64:$src))>; 8561def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), 8562 (v1f64 (REV64v8i8 FPR64:$src))>; 8563def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), 8564 (v1f64 (REV64v2i32 FPR64:$src))>; 8565def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), 8566 (v1f64 (REV64v4i16 FPR64:$src))>; 8567def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), 8568 (v1f64 (REV64v4i16 FPR64:$src))>; 8569} 8570def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; 8571def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 8572 8573let Predicates = [IsLE] in { 8574def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; 8575def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; 8576def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; 8577def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; 8578def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 8579def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; 8580def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>; 8581} 8582let Predicates = [IsBE] in { 8583def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), 8584 (v2f32 (REV64v2i32 FPR64:$src))>; 8585def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), 8586 (v2f32 (REV32v4i16 FPR64:$src))>; 8587def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), 8588 (v2f32 (REV32v8i8 FPR64:$src))>; 8589def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), 8590 (v2f32 (REV64v2i32 FPR64:$src))>; 8591def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), 8592 (v2f32 (REV64v2i32 FPR64:$src))>; 8593def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), 8594 (v2f32 (REV32v4i16 FPR64:$src))>; 8595def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), 8596 (v2f32 (REV32v4i16 FPR64:$src))>; 8597} 8598def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; 8599 8600let Predicates = [IsLE] in { 8601def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; 8602def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; 8603def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; 8604def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; 8605def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; 8606def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; 8607def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>; 8608def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; 8609} 8610let Predicates = [IsBE] in { 8611def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), 8612 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8613def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), 8614 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8615 (REV64v4i32 FPR128:$src), (i32 8)))>; 8616def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), 8617 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8618 (REV64v8i16 FPR128:$src), (i32 8)))>; 8619def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), 8620 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8621 (REV64v8i16 FPR128:$src), (i32 8)))>; 8622def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), 8623 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8624 (REV64v8i16 FPR128:$src), (i32 8)))>; 8625def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), 8626 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8627def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), 8628 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8629 (REV64v4i32 FPR128:$src), (i32 8)))>; 8630def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), 8631 (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), 8632 (REV64v16i8 FPR128:$src), (i32 8)))>; 8633} 8634 8635let Predicates = [IsLE] in { 8636def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; 8637def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; 8638def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; 8639def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; 8640def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>; 8641def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; 8642def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; 8643} 8644let Predicates = [IsBE] in { 8645def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), 8646 (v2f64 (EXTv16i8 FPR128:$src, 8647 FPR128:$src, (i32 8)))>; 8648def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), 8649 (v2f64 (REV64v4i32 FPR128:$src))>; 8650def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), 8651 (v2f64 (REV64v8i16 FPR128:$src))>; 8652def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), 8653 (v2f64 (REV64v8i16 FPR128:$src))>; 8654def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), 8655 (v2f64 (REV64v8i16 FPR128:$src))>; 8656def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), 8657 (v2f64 (REV64v16i8 FPR128:$src))>; 8658def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), 8659 (v2f64 (REV64v4i32 FPR128:$src))>; 8660} 8661def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; 8662 8663let Predicates = [IsLE] in { 8664def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; 8665def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; 8666def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; 8667def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>; 8668def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; 8669def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; 8670def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; 8671} 8672let Predicates = [IsBE] in { 8673def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), 8674 (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8675 (REV64v4i32 FPR128:$src), (i32 8)))>; 8676def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), 8677 (v4f32 (REV32v8i16 FPR128:$src))>; 8678def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), 8679 (v4f32 (REV32v8i16 FPR128:$src))>; 8680def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), 8681 (v4f32 (REV32v8i16 FPR128:$src))>; 8682def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), 8683 (v4f32 (REV32v16i8 FPR128:$src))>; 8684def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), 8685 (v4f32 (REV64v4i32 FPR128:$src))>; 8686def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), 8687 (v4f32 (REV64v4i32 FPR128:$src))>; 8688} 8689def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; 8690 8691let Predicates = [IsLE] in { 8692def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; 8693def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; 8694def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; 8695def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; 8696def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; 8697def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; 8698def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>; 8699} 8700let Predicates = [IsBE] in { 8701def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), 8702 (v2i64 (EXTv16i8 FPR128:$src, 8703 FPR128:$src, (i32 8)))>; 8704def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), 8705 (v2i64 (REV64v4i32 FPR128:$src))>; 8706def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), 8707 (v2i64 (REV64v8i16 FPR128:$src))>; 8708def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), 8709 (v2i64 (REV64v16i8 FPR128:$src))>; 8710def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), 8711 (v2i64 (REV64v4i32 FPR128:$src))>; 8712def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), 8713 (v2i64 (REV64v8i16 FPR128:$src))>; 8714def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), 8715 (v2i64 (REV64v8i16 FPR128:$src))>; 8716} 8717def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; 8718 8719let Predicates = [IsLE] in { 8720def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; 8721def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; 8722def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; 8723def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; 8724def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; 8725def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; 8726def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>; 8727} 8728let Predicates = [IsBE] in { 8729def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), 8730 (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8731 (REV64v4i32 FPR128:$src), 8732 (i32 8)))>; 8733def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), 8734 (v4i32 (REV64v4i32 FPR128:$src))>; 8735def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), 8736 (v4i32 (REV32v8i16 FPR128:$src))>; 8737def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), 8738 (v4i32 (REV32v16i8 FPR128:$src))>; 8739def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), 8740 (v4i32 (REV64v4i32 FPR128:$src))>; 8741def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), 8742 (v4i32 (REV32v8i16 FPR128:$src))>; 8743def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), 8744 (v4i32 (REV32v8i16 FPR128:$src))>; 8745} 8746def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; 8747 8748let Predicates = [IsLE] in { 8749def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; 8750def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; 8751def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; 8752def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; 8753def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; 8754def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; 8755} 8756let Predicates = [IsBE] in { 8757def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), 8758 (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8759 (REV64v8i16 FPR128:$src), 8760 (i32 8)))>; 8761def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), 8762 (v8i16 (REV64v8i16 FPR128:$src))>; 8763def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), 8764 (v8i16 (REV32v8i16 FPR128:$src))>; 8765def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), 8766 (v8i16 (REV16v16i8 FPR128:$src))>; 8767def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), 8768 (v8i16 (REV64v8i16 FPR128:$src))>; 8769def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), 8770 (v8i16 (REV32v8i16 FPR128:$src))>; 8771} 8772def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; 8773def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>; 8774 8775let Predicates = [IsLE] in { 8776def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; 8777def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; 8778def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; 8779def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; 8780def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; 8781def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; 8782 8783def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>; 8784def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8785def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8786def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; 8787def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8788def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8789} 8790let Predicates = [IsBE] in { 8791def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), 8792 (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8793 (REV64v8i16 FPR128:$src), 8794 (i32 8)))>; 8795def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), 8796 (v8f16 (REV64v8i16 FPR128:$src))>; 8797def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), 8798 (v8f16 (REV32v8i16 FPR128:$src))>; 8799def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), 8800 (v8f16 (REV16v16i8 FPR128:$src))>; 8801def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), 8802 (v8f16 (REV64v8i16 FPR128:$src))>; 8803def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), 8804 (v8f16 (REV32v8i16 FPR128:$src))>; 8805 8806def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), 8807 (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8808 (REV64v8i16 FPR128:$src), 8809 (i32 8)))>; 8810def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), 8811 (v8bf16 (REV64v8i16 FPR128:$src))>; 8812def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), 8813 (v8bf16 (REV32v8i16 FPR128:$src))>; 8814def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), 8815 (v8bf16 (REV16v16i8 FPR128:$src))>; 8816def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), 8817 (v8bf16 (REV64v8i16 FPR128:$src))>; 8818def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), 8819 (v8bf16 (REV32v8i16 FPR128:$src))>; 8820} 8821def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; 8822def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; 8823 8824let Predicates = [IsLE] in { 8825def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; 8826def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; 8827def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; 8828def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; 8829def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; 8830def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; 8831def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; 8832def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>; 8833} 8834let Predicates = [IsBE] in { 8835def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), 8836 (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), 8837 (REV64v16i8 FPR128:$src), 8838 (i32 8)))>; 8839def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), 8840 (v16i8 (REV64v16i8 FPR128:$src))>; 8841def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), 8842 (v16i8 (REV32v16i8 FPR128:$src))>; 8843def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), 8844 (v16i8 (REV16v16i8 FPR128:$src))>; 8845def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), 8846 (v16i8 (REV64v16i8 FPR128:$src))>; 8847def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), 8848 (v16i8 (REV32v16i8 FPR128:$src))>; 8849def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), 8850 (v16i8 (REV16v16i8 FPR128:$src))>; 8851def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), 8852 (v16i8 (REV16v16i8 FPR128:$src))>; 8853} 8854 8855def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), 8856 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8857def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), 8858 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8859def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), 8860 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8861def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), 8862 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8863def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))), 8864 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8865def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), 8866 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8867def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), 8868 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8869def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), 8870 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8871 8872def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), 8873 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8874def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), 8875 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8876def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), 8877 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8878def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), 8879 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8880 8881// A 64-bit subvector insert to the first 128-bit vector position 8882// is a subregister copy that needs no instruction. 8883multiclass InsertSubvectorUndef<ValueType Ty> { 8884 def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), 8885 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8886 def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), 8887 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8888 def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), 8889 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8890 def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), 8891 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8892 def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), 8893 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8894 def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), 8895 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8896 def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)), 8897 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8898 def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), 8899 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8900} 8901 8902defm : InsertSubvectorUndef<i32>; 8903defm : InsertSubvectorUndef<i64>; 8904 8905// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 8906// or v2f32. 8907def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), 8908 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), 8909 (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; 8910def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), 8911 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), 8912 (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; 8913 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, 8914 // so we match on v4f32 here, not v2f32. This will also catch adding 8915 // the low two lanes of a true v4f32 vector. 8916def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), 8917 (vector_extract (v4f32 FPR128:$Rn), (i64 1))), 8918 (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8919def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), 8920 (vector_extract (v8f16 FPR128:$Rn), (i64 1))), 8921 (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8922 8923// Prefer using the bottom lanes of addp Rn, Rn compared to 8924// addp extractlow(Rn), extracthigh(Rn) 8925def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))), 8926 (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))), 8927 (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>; 8928def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))), 8929 (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))), 8930 (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>; 8931def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))), 8932 (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))), 8933 (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>; 8934 8935def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))), 8936 (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))), 8937 (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>; 8938def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))), 8939 (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))), 8940 (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>; 8941 8942// Scalar 64-bit shifts in FPR64 registers. 8943def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8944 (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8945def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8946 (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8947def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8948 (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8949def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8950 (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8951 8952// Patterns for nontemporal/no-allocate stores. 8953// We have to resort to tricks to turn a single-input store into a store pair, 8954// because there is no single-input nontemporal store, only STNP. 8955let Predicates = [IsLE] in { 8956let AddedComplexity = 15 in { 8957class NTStore128Pat<ValueType VT> : 8958 Pat<(nontemporalstore (VT FPR128:$Rt), 8959 (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 8960 (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), 8961 (DUPi64 FPR128:$Rt, (i64 1)), 8962 GPR64sp:$Rn, simm7s8:$offset)>; 8963 8964def : NTStore128Pat<v2i64>; 8965def : NTStore128Pat<v4i32>; 8966def : NTStore128Pat<v8i16>; 8967def : NTStore128Pat<v16i8>; 8968 8969class NTStore64Pat<ValueType VT> : 8970 Pat<(nontemporalstore (VT FPR64:$Rt), 8971 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 8972 (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), 8973 (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), 8974 GPR64sp:$Rn, simm7s4:$offset)>; 8975 8976// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? 8977def : NTStore64Pat<v1f64>; 8978def : NTStore64Pat<v1i64>; 8979def : NTStore64Pat<v2i32>; 8980def : NTStore64Pat<v4i16>; 8981def : NTStore64Pat<v8i8>; 8982 8983def : Pat<(nontemporalstore GPR64:$Rt, 8984 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 8985 (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), 8986 (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), 8987 GPR64sp:$Rn, simm7s4:$offset)>; 8988} // AddedComplexity=10 8989} // Predicates = [IsLE] 8990 8991// Tail call return handling. These are all compiler pseudo-instructions, 8992// so no encoding information or anything like that. 8993let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { 8994 def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, 8995 Sched<[WriteBrReg]>; 8996 def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, 8997 Sched<[WriteBrReg]>; 8998 // Indirect tail-call with any register allowed, used by MachineOutliner when 8999 // this is proven safe. 9000 // FIXME: If we have to add any more hacks like this, we should instead relax 9001 // some verifier checks for outlined functions. 9002 def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>, 9003 Sched<[WriteBrReg]>; 9004 // Indirect tail-call limited to only use registers (x16 and x17) which are 9005 // allowed to tail-call a "BTI c" instruction. 9006 def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>, 9007 Sched<[WriteBrReg]>; 9008} 9009 9010def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), 9011 (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>, 9012 Requires<[NotUseBTI]>; 9013def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)), 9014 (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>, 9015 Requires<[UseBTI]>; 9016def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), 9017 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 9018def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), 9019 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 9020 9021def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; 9022def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; 9023 9024// Extracting lane zero is a special case where we can just use a plain 9025// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the 9026// rest of the compiler, especially the register allocator and copy propagation, 9027// to reason about, so is preferred when it's possible to use it. 9028let AddedComplexity = 10 in { 9029 def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; 9030 def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; 9031 def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; 9032} 9033 9034// dot_v4i8 9035class mul_v4i8<SDPatternOperator ldop> : 9036 PatFrag<(ops node:$Rn, node:$Rm, node:$offset), 9037 (mul (ldop (add node:$Rn, node:$offset)), 9038 (ldop (add node:$Rm, node:$offset)))>; 9039class mulz_v4i8<SDPatternOperator ldop> : 9040 PatFrag<(ops node:$Rn, node:$Rm), 9041 (mul (ldop node:$Rn), (ldop node:$Rm))>; 9042 9043def load_v4i8 : 9044 OutPatFrag<(ops node:$R), 9045 (INSERT_SUBREG 9046 (v2i32 (IMPLICIT_DEF)), 9047 (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)), 9048 ssub)>; 9049 9050class dot_v4i8<Instruction DOT, SDPatternOperator ldop> : 9051 Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)), 9052 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)), 9053 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)), 9054 (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))), 9055 (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR), 9056 (load_v4i8 GPR64sp:$Rn), 9057 (load_v4i8 GPR64sp:$Rm))), 9058 sub_32)>, Requires<[HasDotProd]>; 9059 9060// dot_v8i8 9061class ee_v8i8<SDPatternOperator extend> : 9062 PatFrag<(ops node:$V, node:$K), 9063 (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>; 9064 9065class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 9066 PatFrag<(ops node:$M, node:$N, node:$K), 9067 (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)), 9068 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>; 9069 9070class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 9071 PatFrag<(ops node:$M, node:$N), 9072 (i32 (extractelt 9073 (v4i32 (AArch64uaddv 9074 (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)), 9075 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))), 9076 (i64 0)))>; 9077 9078// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 9079def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>; 9080 9081class odot_v8i8<Instruction DOT> : 9082 OutPatFrag<(ops node:$Vm, node:$Vn), 9083 (EXTRACT_SUBREG 9084 (VADDV_32 9085 (i64 (DOT (DUPv2i32gpr WZR), 9086 (v8i8 node:$Vm), 9087 (v8i8 node:$Vn)))), 9088 sub_32)>; 9089 9090class dot_v8i8<Instruction DOT, SDPatternOperator mulop, 9091 SDPatternOperator extend> : 9092 Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn), 9093 (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>, 9094 Requires<[HasDotProd]>; 9095 9096// dot_v16i8 9097class ee_v16i8<SDPatternOperator extend> : 9098 PatFrag<(ops node:$V, node:$K1, node:$K2), 9099 (v4i16 (extract_subvector 9100 (v8i16 (extend 9101 (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>; 9102 9103class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> : 9104 PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2), 9105 (v4i32 9106 (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)), 9107 (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>; 9108 9109class idot_v16i8<SDPatternOperator m, SDPatternOperator x> : 9110 PatFrag<(ops node:$M, node:$N), 9111 (i32 (extractelt 9112 (v4i32 (AArch64uaddv 9113 (add 9114 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)), 9115 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))), 9116 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)), 9117 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))), 9118 (i64 0)))>; 9119 9120class odot_v16i8<Instruction DOT> : 9121 OutPatFrag<(ops node:$Vm, node:$Vn), 9122 (i32 (ADDVv4i32v 9123 (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>; 9124 9125class dot_v16i8<Instruction DOT, SDPatternOperator mulop, 9126 SDPatternOperator extend> : 9127 Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn), 9128 (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>, 9129 Requires<[HasDotProd]>; 9130 9131let AddedComplexity = 10 in { 9132 def : dot_v4i8<SDOTv8i8, sextloadi8>; 9133 def : dot_v4i8<UDOTv8i8, zextloadi8>; 9134 def : dot_v8i8<SDOTv8i8, AArch64smull, sext>; 9135 def : dot_v8i8<UDOTv8i8, AArch64umull, zext>; 9136 def : dot_v16i8<SDOTv16i8, AArch64smull, sext>; 9137 def : dot_v16i8<UDOTv16i8, AArch64umull, zext>; 9138 9139 // FIXME: add patterns to generate vector by element dot product. 9140 // FIXME: add SVE dot-product patterns. 9141} 9142 9143// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs, 9144// so that it can be used as input to inline asm, and vice versa. 9145def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>; 9146def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>; 9147def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, 9148 GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)), 9149 (REG_SEQUENCE GPR64x8Class, 9150 $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, 9151 $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>; 9152foreach i = 0-7 in { 9153 def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))), 9154 (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>; 9155} 9156 9157let Predicates = [HasLS64] in { 9158 def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), 9159 (outs GPR64x8:$Rt)>; 9160 def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn), 9161 (outs)>; 9162 def ST64BV: Store64BV<0b011, "st64bv">; 9163 def ST64BV0: Store64BV<0b010, "st64bv0">; 9164 9165 class ST64BPattern<Intrinsic intrinsic, Instruction instruction> 9166 : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7), 9167 (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>; 9168 9169 def : ST64BPattern<int_aarch64_st64b, ST64B>; 9170 def : ST64BPattern<int_aarch64_st64bv, ST64BV>; 9171 def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>; 9172} 9173 9174let Predicates = [HasMOPS] in { 9175 let Defs = [NZCV] in { 9176 defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; 9177 9178 defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; 9179 9180 defm SETP : MOPSMemorySetInsns<0b00, "setp">; 9181 } 9182 let Uses = [NZCV] in { 9183 defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; 9184 defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; 9185 9186 defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; 9187 defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; 9188 9189 defm SETM : MOPSMemorySetInsns<0b01, "setm">; 9190 defm SETE : MOPSMemorySetInsns<0b10, "sete">; 9191 } 9192} 9193let Predicates = [HasMOPS, HasMTE] in { 9194 let Defs = [NZCV] in { 9195 defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; 9196 } 9197 let Uses = [NZCV] in { 9198 defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; 9199 // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td 9200 defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; 9201 } 9202} 9203 9204// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain 9205// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain 9206def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; 9207def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; 9208def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; 9209def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; 9210def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; 9211 9212// MOPS operations always contain three 4-byte instructions 9213let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { 9214 let mayLoad = 1 in { 9215 def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 9216 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 9217 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 9218 def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 9219 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 9220 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 9221 } 9222 let mayLoad = 0 in { 9223 def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 9224 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 9225 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 9226 } 9227} 9228let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in { 9229 def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 9230 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 9231 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 9232} 9233 9234//----------------------------------------------------------------------------- 9235// v8.3 Pointer Authentication late patterns 9236 9237let Predicates = [HasPAuth] in { 9238def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm), 9239 (MOVKXi GPR64:$Rd, (trunc_imm imm64_0_65535:$imm), 48)>; 9240def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn), 9241 (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>; 9242} 9243 9244//----------------------------------------------------------------------------- 9245 9246// This gets lowered into an instruction sequence of 20 bytes 9247let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in 9248def StoreSwiftAsyncContext 9249 : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), 9250 []>, Sched<[]>; 9251 9252def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>; 9253def : Pat<(AArch64AssertZExtBool GPR32:$op), 9254 (i32 GPR32:$op)>; 9255 9256//===----------------------------===// 9257// 2022 Architecture Extensions: 9258//===----------------------------===// 9259 9260def : InstAlias<"clrbhb", (HINT 22), 0>; 9261let Predicates = [HasCLRBHB] in { 9262 def : InstAlias<"clrbhb", (HINT 22), 1>; 9263} 9264 9265//===----------------------------------------------------------------------===// 9266// Translation Hardening Extension (FEAT_THE) 9267//===----------------------------------------------------------------------===// 9268defm RCW : ReadCheckWriteCompareAndSwap; 9269 9270defm RCWCLR : ReadCheckWriteOperation<0b001, "clr">; 9271defm RCWSET : ReadCheckWriteOperation<0b011, "set">; 9272defm RCWSWP : ReadCheckWriteOperation<0b010, "swp">; 9273 9274//===----------------------------------------------------------------------===// 9275// General Data-Processing Instructions (FEAT_V94_DP) 9276//===----------------------------------------------------------------------===// 9277defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>; 9278defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>; 9279defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>; 9280 9281defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>; 9282defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>; 9283defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>; 9284defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>; 9285 9286def RPRFM: 9287 I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn), 9288 "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>, 9289 Sched<[]> { 9290 bits<6> Rt; 9291 bits<5> Rn; 9292 bits<5> Rm; 9293 let Inst{2-0} = Rt{2-0}; 9294 let Inst{4-3} = 0b11; 9295 let Inst{9-5} = Rn; 9296 let Inst{11-10} = 0b10; 9297 let Inst{13-12} = Rt{4-3}; 9298 let Inst{14} = 0b1; 9299 let Inst{15} = Rt{5}; 9300 let Inst{20-16} = Rm; 9301 let Inst{31-21} = 0b11111000101; 9302 let mayLoad = 0; 9303 let mayStore = 0; 9304 let hasSideEffects = 1; 9305 // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns 9306 // Fail, the decoder should attempt to decode RPRFM. This requires setting 9307 // the decoder namespace to "Fallback". 9308 let DecoderNamespace = "Fallback"; 9309} 9310 9311//===----------------------------------------------------------------------===// 9312// 128-bit Atomics (FEAT_LSE128) 9313//===----------------------------------------------------------------------===// 9314let Predicates = [HasLSE128] in { 9315 def SWPP : LSE128Base<0b000, 0b00, 0b1, "swpp">; 9316 def SWPPA : LSE128Base<0b000, 0b10, 0b1, "swppa">; 9317 def SWPPAL : LSE128Base<0b000, 0b11, 0b1, "swppal">; 9318 def SWPPL : LSE128Base<0b000, 0b01, 0b1, "swppl">; 9319 def LDCLRP : LSE128Base<0b001, 0b00, 0b0, "ldclrp">; 9320 def LDCLRPA : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">; 9321 def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">; 9322 def LDCLRPL : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">; 9323 def LDSETP : LSE128Base<0b011, 0b00, 0b0, "ldsetp">; 9324 def LDSETPA : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">; 9325 def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">; 9326 def LDSETPL : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">; 9327} 9328 9329//===----------------------------------------------------------------------===// 9330// RCPC Instructions (FEAT_LRCPC3) 9331//===----------------------------------------------------------------------===// 9332 9333let Predicates = [HasRCPC3] in { 9334 // size opc opc2 9335 def STILPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">; 9336 def STILPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">; 9337 def STILPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 9338 def STILPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 9339 def LDIAPPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">; 9340 def LDIAPPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">; 9341 def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 9342 def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 9343 9344 def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>; 9345 def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>; 9346 9347 // Aliases for when offset=0 9348 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>; 9349 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>; 9350 9351 // size opc 9352 def STLRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">; 9353 def STLRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">; 9354 def LDAPRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #4", "$Rn = $wback">; 9355 def LDAPRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #8", "$Rn = $wback">; 9356} 9357 9358let Predicates = [HasRCPC3, HasNEON] in { 9359 // size opc regtype 9360 defm STLURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8 , (outs), (ins FPR8 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9361 defm STLURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9362 defm STLURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9363 defm STLURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9364 defm STLURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9365 defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8 , (outs FPR8 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9366 defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9367 defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9368 defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9369 defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9370 9371 // L 9372 def STL1: LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">; 9373 def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">; 9374 9375 // Aliases for when offset=0 9376 def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>; 9377} 9378 9379//===----------------------------------------------------------------------===// 9380// 128-bit System Instructions (FEAT_SYSINSTR128) 9381//===----------------------------------------------------------------------===// 9382let Predicates = [HasD128] in { 9383 def SYSPxt : SystemPXtI<0, "sysp">; 9384 9385 def SYSPxt_XZR 9386 : BaseSystemI<0, (outs), 9387 (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair), 9388 "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">, 9389 Sched<[WriteSys]> 9390 { 9391 // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?) 9392 // and therefore autogenerates a decoder that builds an MC representation that has 4 fields 9393 // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one 9394 // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc 9395 // is based off of the asm template (maybe) and therefore wants to print 5 operands. 9396 // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would 9397 // overlap with the main SYSP instruction. 9398 let DecoderMethod = "DecodeSyspXzrInstruction"; 9399 bits<3> op1; 9400 bits<4> Cn; 9401 bits<4> Cm; 9402 bits<3> op2; 9403 let Inst{22} = 0b1; // override BaseSystemI 9404 let Inst{20-19} = 0b01; 9405 let Inst{18-16} = op1; 9406 let Inst{15-12} = Cn; 9407 let Inst{11-8} = Cm; 9408 let Inst{7-5} = op2; 9409 let Inst{4-0} = 0b11111; 9410 } 9411 9412 def : InstAlias<"sysp $op1, $Cn, $Cm, $op2", 9413 (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 9414} 9415 9416//--- 9417// 128-bit System Registers (FEAT_SYSREG128) 9418//--- 9419 9420// Instruction encoding: 9421// 9422// 31 22|21|20|19|18 16|15 12|11 8|7 5|4 0 9423// MRRS 1101010101| 1| 1|o0| op1| Cn| Cm|op2| Rt 9424// MSRR 1101010101| 0| 1|o0| op1| Cn| Cm|op2| Rt 9425 9426// Instruction syntax: 9427// 9428// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>> 9429// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1> 9430// 9431// ...where t is even (X0, X2, etc). 9432 9433let Predicates = [HasD128] in { 9434 def MRRS : RtSystemI128<1, 9435 (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg), 9436 "mrrs", "\t$Rt, $systemreg"> 9437 { 9438 bits<16> systemreg; 9439 let Inst{20-5} = systemreg; 9440 } 9441 9442 def MSRR : RtSystemI128<0, 9443 (outs), (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt), 9444 "msrr", "\t$systemreg, $Rt"> 9445 { 9446 bits<16> systemreg; 9447 let Inst{20-5} = systemreg; 9448 } 9449} 9450 9451//===----------------------------===// 9452// 2023 Architecture Extensions: 9453//===----------------------------===// 9454 9455let Predicates = [HasFP8] in { 9456 defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">; 9457 defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">; 9458 defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">; 9459 defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">; 9460 defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">; 9461 defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">; 9462 defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>; 9463} // End let Predicates = [HasFP8] 9464 9465let Predicates = [HasFAMINMAX] in { 9466 defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>; 9467 defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>; 9468} // End let Predicates = [HasFAMAXMIN] 9469 9470let Predicates = [HasFP8FMA] in { 9471 defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">; 9472 defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">; 9473 defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">; 9474 defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">; 9475 defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">; 9476 defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">; 9477 9478 defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">; 9479 defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">; 9480 defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">; 9481 defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">; 9482 defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">; 9483 defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">; 9484} // End let Predicates = [HasFP8FMA] 9485 9486let Predicates = [HasFP8DOT2] in { 9487 defm FDOTlane : SIMDThreeSameVectorFP8DOT2Index<"fdot">; 9488 defm FDOT : SIMDThreeSameVectorDOT2<"fdot">; 9489} // End let Predicates = [HasFP8DOT2] 9490 9491let Predicates = [HasFP8DOT4] in { 9492 defm FDOTlane : SIMDThreeSameVectorFP8DOT4Index<"fdot">; 9493 defm FDOT : SIMDThreeSameVectorDOT4<"fdot">; 9494} // End let Predicates = [HasFP8DOT4] 9495 9496//===----------------------------------------------------------------------===// 9497// Checked Pointer Arithmetic (FEAT_CPA) 9498//===----------------------------------------------------------------------===// 9499let Predicates = [HasCPA] in { 9500 // Scalar add/subtract 9501 defm ADDPT : AddSubCPA<0, "addpt">; 9502 defm SUBPT : AddSubCPA<1, "subpt">; 9503 9504 // Scalar multiply-add/subtract 9505 def MADDPT : MulAccumCPA<0, "maddpt">; 9506 def MSUBPT : MulAccumCPA<1, "msubpt">; 9507} 9508 9509include "AArch64InstrAtomics.td" 9510include "AArch64SVEInstrInfo.td" 9511include "AArch64SMEInstrInfo.td" 9512include "AArch64InstrGISel.td" 9513