1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Instruction definitions. 10// 11//===----------------------------------------------------------------------===// 12 13//===----------------------------------------------------------------------===// 14// ARM Instruction Predicate Definitions. 15// 16def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">, 17 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">; 18def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, 19 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">; 20def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, 21 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">; 22def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, 23 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">; 24def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, 25 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">; 26def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, 27 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">; 28def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, 29 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">; 30def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">, 31 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">; 32def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">, 33 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">; 34def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">, 35 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">; 36def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">, 37 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">; 38def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, 39 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">; 40def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, 41 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">; 42def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, 43 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">; 44def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">, 45 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">; 46def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, 47 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">; 48 49def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">, 50 AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">; 51 52def HasEL3 : Predicate<"Subtarget->hasEL3()">, 53 AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">; 54 55def HasVH : Predicate<"Subtarget->hasVH()">, 56 AssemblerPredicateWithAll<(all_of FeatureVH), "vh">; 57 58def HasLOR : Predicate<"Subtarget->hasLOR()">, 59 AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">; 60 61def HasPAuth : Predicate<"Subtarget->hasPAuth()">, 62 AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; 63 64def HasJS : Predicate<"Subtarget->hasJS()">, 65 AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; 66 67def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, 68 AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">; 69 70def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, 71 AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">; 72 73def HasNV : Predicate<"Subtarget->hasNV()">, 74 AssemblerPredicateWithAll<(all_of FeatureNV), "nv">; 75 76def HasMPAM : Predicate<"Subtarget->hasMPAM()">, 77 AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">; 78 79def HasDIT : Predicate<"Subtarget->hasDIT()">, 80 AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">; 81 82def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, 83 AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">; 84 85def HasAM : Predicate<"Subtarget->hasAM()">, 86 AssemblerPredicateWithAll<(all_of FeatureAM), "am">; 87 88def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, 89 AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">; 90 91def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, 92 AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">; 93 94def HasFlagM : Predicate<"Subtarget->hasFlagM()">, 95 AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; 96 97def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">, 98 AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; 99 100def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, 101 AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">; 102def HasNEON : Predicate<"Subtarget->hasNEON()">, 103 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">; 104def HasSM4 : Predicate<"Subtarget->hasSM4()">, 105 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">; 106def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, 107 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">; 108def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, 109 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">; 110def HasAES : Predicate<"Subtarget->hasAES()">, 111 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">; 112def HasDotProd : Predicate<"Subtarget->hasDotProd()">, 113 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">; 114def HasCRC : Predicate<"Subtarget->hasCRC()">, 115 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">; 116def HasCSSC : Predicate<"Subtarget->hasCSSC()">, 117 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">; 118def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">; 119def HasLSE : Predicate<"Subtarget->hasLSE()">, 120 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">; 121def HasNoLSE : Predicate<"!Subtarget->hasLSE()">; 122def HasRAS : Predicate<"Subtarget->hasRAS()">, 123 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">; 124def HasRDM : Predicate<"Subtarget->hasRDM()">, 125 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">; 126def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, 127 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">; 128def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, 129 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">; 130def HasSPE : Predicate<"Subtarget->hasSPE()">, 131 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">; 132def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, 133 AssemblerPredicateWithAll<(all_of FeatureFuseAES), 134 "fuse-aes">; 135def HasSVE : Predicate<"Subtarget->hasSVE()">, 136 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">; 137def HasSVE2 : Predicate<"Subtarget->hasSVE2()">, 138 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">; 139def HasSVE2p1 : Predicate<"Subtarget->hasSVE2p1()">, 140 AssemblerPredicate<(any_of FeatureSVE2p1), "sve2p1">; 141def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">, 142 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">; 143def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, 144 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">; 145def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, 146 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">; 147def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, 148 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; 149def HasB16B16 : Predicate<"Subtarget->hasB16B16()">, 150 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">; 151def HasSME : Predicate<"Subtarget->hasSME()">, 152 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; 153def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">, 154 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">; 155def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">, 156 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">; 157def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">, 158 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">; 159def HasSME2 : Predicate<"Subtarget->hasSME2()">, 160 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">; 161def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">, 162 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">; 163 164// A subset of SVE(2) instructions are legal in Streaming SVE execution mode, 165// they should be enabled if either has been specified. 166def HasSVEorSME 167 : Predicate<"Subtarget->hasSVEorSME()">, 168 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), 169 "sve or sme">; 170def HasSVE2orSME 171 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">, 172 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME), 173 "sve2 or sme">; 174def HasSVE2p1_or_HasSME 175 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">, 176 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; 177def HasSVE2p1_or_HasSME2 178 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">, 179 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; 180def HasSVE2p1_or_HasSME2p1 181 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">, 182 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">; 183// A subset of NEON instructions are legal in Streaming SVE execution mode, 184// they should be enabled if either has been specified. 185def HasNEONorSME 186 : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">, 187 AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME), 188 "neon or sme">; 189def HasRCPC : Predicate<"Subtarget->hasRCPC()">, 190 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; 191def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, 192 AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">; 193def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">, 194 AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">; 195def HasSB : Predicate<"Subtarget->hasSB()">, 196 AssemblerPredicateWithAll<(all_of FeatureSB), "sb">; 197def HasPredRes : Predicate<"Subtarget->hasPredRes()">, 198 AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">; 199def HasCCDP : Predicate<"Subtarget->hasCCDP()">, 200 AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; 201def HasBTI : Predicate<"Subtarget->hasBTI()">, 202 AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; 203def HasMTE : Predicate<"Subtarget->hasMTE()">, 204 AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; 205def HasTME : Predicate<"Subtarget->hasTME()">, 206 AssemblerPredicateWithAll<(all_of FeatureTME), "tme">; 207def HasETE : Predicate<"Subtarget->hasETE()">, 208 AssemblerPredicateWithAll<(all_of FeatureETE), "ete">; 209def HasTRBE : Predicate<"Subtarget->hasTRBE()">, 210 AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">; 211def HasBF16 : Predicate<"Subtarget->hasBF16()">, 212 AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">; 213def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">, 214 AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">; 215def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">, 216 AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">; 217def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">, 218 AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">; 219def HasXS : Predicate<"Subtarget->hasXS()">, 220 AssemblerPredicateWithAll<(all_of FeatureXS), "xs">; 221def HasWFxT : Predicate<"Subtarget->hasWFxT()">, 222 AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">; 223def HasLS64 : Predicate<"Subtarget->hasLS64()">, 224 AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">; 225def HasBRBE : Predicate<"Subtarget->hasBRBE()">, 226 AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">; 227def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, 228 AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">; 229def HasHBC : Predicate<"Subtarget->hasHBC()">, 230 AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">; 231def HasMOPS : Predicate<"Subtarget->hasMOPS()">, 232 AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">; 233def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">, 234 AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">; 235def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">, 236 AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">; 237def HasITE : Predicate<"Subtarget->hasITE()">, 238 AssemblerPredicateWithAll<(all_of FeatureITE), "ite">; 239def HasTHE : Predicate<"Subtarget->hasTHE()">, 240 AssemblerPredicateWithAll<(all_of FeatureTHE), "the">; 241def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">, 242 AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">; 243def HasLSE128 : Predicate<"Subtarget->hasLSE128()">, 244 AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">; 245def HasD128 : Predicate<"Subtarget->hasD128()">, 246 AssemblerPredicateWithAll<(all_of FeatureD128), "d128">; 247def HasCHK : Predicate<"Subtarget->hasCHK()">, 248 AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">; 249def HasGCS : Predicate<"Subtarget->hasGCS()">, 250 AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">; 251def IsLE : Predicate<"Subtarget->isLittleEndian()">; 252def IsBE : Predicate<"!Subtarget->isLittleEndian()">; 253def IsWindows : Predicate<"Subtarget->isTargetWindows()">; 254def UseExperimentalZeroingPseudos 255 : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; 256def UseAlternateSExtLoadCVTF32 257 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; 258 259def UseNegativeImmediates 260 : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), 261 "NegativeImmediates">; 262 263def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; 264 265def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">; 266 267def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">; 268 269def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", 270 SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, 271 SDTCisInt<1>]>>; 272 273 274//===----------------------------------------------------------------------===// 275// AArch64-specific DAG Nodes. 276// 277 278// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS 279def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, 280 [SDTCisSameAs<0, 2>, 281 SDTCisSameAs<0, 3>, 282 SDTCisInt<0>, SDTCisVT<1, i32>]>; 283 284// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS 285def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, 286 [SDTCisSameAs<0, 1>, 287 SDTCisSameAs<0, 2>, 288 SDTCisInt<0>, 289 SDTCisVT<3, i32>]>; 290 291// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS 292def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, 293 [SDTCisSameAs<0, 2>, 294 SDTCisSameAs<0, 3>, 295 SDTCisInt<0>, 296 SDTCisVT<1, i32>, 297 SDTCisVT<4, i32>]>; 298 299def SDT_AArch64Brcond : SDTypeProfile<0, 3, 300 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, 301 SDTCisVT<2, i32>]>; 302def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; 303def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 304 SDTCisVT<2, OtherVT>]>; 305 306 307def SDT_AArch64CSel : SDTypeProfile<1, 4, 308 [SDTCisSameAs<0, 1>, 309 SDTCisSameAs<0, 2>, 310 SDTCisInt<3>, 311 SDTCisVT<4, i32>]>; 312def SDT_AArch64CCMP : SDTypeProfile<1, 5, 313 [SDTCisVT<0, i32>, 314 SDTCisInt<1>, 315 SDTCisSameAs<1, 2>, 316 SDTCisInt<3>, 317 SDTCisInt<4>, 318 SDTCisVT<5, i32>]>; 319def SDT_AArch64FCCMP : SDTypeProfile<1, 5, 320 [SDTCisVT<0, i32>, 321 SDTCisFP<1>, 322 SDTCisSameAs<1, 2>, 323 SDTCisInt<3>, 324 SDTCisInt<4>, 325 SDTCisVT<5, i32>]>; 326def SDT_AArch64FCmp : SDTypeProfile<0, 2, 327 [SDTCisFP<0>, 328 SDTCisSameAs<0, 1>]>; 329def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; 330def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; 331def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; 332def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, 333 SDTCisSameAs<0, 1>, 334 SDTCisSameAs<0, 2>]>; 335def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; 336def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 337def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 338 SDTCisInt<2>, SDTCisInt<3>]>; 339def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 340def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 341 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 342def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; 343def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 344 SDTCisVec<2>, SDTCisSameAs<2,3>]>; 345 346def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>, 347 SDTCisSameAs<0,1>, 348 SDTCisSameAs<0,2>]>; 349 350def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 351def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; 352def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; 353def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 354 SDTCisSameAs<0,2>]>; 355def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 356 SDTCisSameAs<0,2>, 357 SDTCisSameAs<0,3>]>; 358def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; 359def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; 360 361def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; 362 363def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, 364 SDTCisPtrTy<1>]>; 365 366def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 367 368def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 369def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 370def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 371def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 372def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 373def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 374 375// Generates the general dynamic sequences, i.e. 376// adrp x0, :tlsdesc:var 377// ldr x1, [x0, #:tlsdesc_lo12:var] 378// add x0, x0, #:tlsdesc_lo12:var 379// .tlsdesccall var 380// blr x1 381 382// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) 383// number of operands (the variable) 384def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, 385 [SDTCisPtrTy<0>]>; 386 387def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, 388 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, 389 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, 390 SDTCisSameAs<1, 4>]>; 391 392def SDT_AArch64TBL : SDTypeProfile<1, 2, [ 393 SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> 394]>; 395 396// non-extending masked load fragment. 397def nonext_masked_load : 398 PatFrag<(ops node:$ptr, node:$pred, node:$def), 399 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 400 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 401 cast<MaskedLoadSDNode>(N)->isUnindexed() && 402 !cast<MaskedLoadSDNode>(N)->isNonTemporal(); 403}]>; 404// Any/Zero extending masked load fragments. 405def azext_masked_load : 406 PatFrag<(ops node:$ptr, node:$pred, node:$def), 407 (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ 408 return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD || 409 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) && 410 cast<MaskedLoadSDNode>(N)->isUnindexed(); 411}]>; 412def azext_masked_load_i8 : 413 PatFrag<(ops node:$ptr, node:$pred, node:$def), 414 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 415 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 416}]>; 417def azext_masked_load_i16 : 418 PatFrag<(ops node:$ptr, node:$pred, node:$def), 419 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 420 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 421}]>; 422def azext_masked_load_i32 : 423 PatFrag<(ops node:$ptr, node:$pred, node:$def), 424 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 425 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 426}]>; 427// Sign extending masked load fragments. 428def sext_masked_load : 429 PatFrag<(ops node:$ptr, node:$pred, node:$def), 430 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 431 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD && 432 cast<MaskedLoadSDNode>(N)->isUnindexed(); 433}]>; 434def sext_masked_load_i8 : 435 PatFrag<(ops node:$ptr, node:$pred, node:$def), 436 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 437 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 438}]>; 439def sext_masked_load_i16 : 440 PatFrag<(ops node:$ptr, node:$pred, node:$def), 441 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 442 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 443}]>; 444def sext_masked_load_i32 : 445 PatFrag<(ops node:$ptr, node:$pred, node:$def), 446 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 447 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 448}]>; 449 450def non_temporal_load : 451 PatFrag<(ops node:$ptr, node:$pred, node:$def), 452 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 453 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 454 cast<MaskedLoadSDNode>(N)->isUnindexed() && 455 cast<MaskedLoadSDNode>(N)->isNonTemporal(); 456}]>; 457 458// non-truncating masked store fragment. 459def nontrunc_masked_store : 460 PatFrag<(ops node:$val, node:$ptr, node:$pred), 461 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 462 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 463 cast<MaskedStoreSDNode>(N)->isUnindexed() && 464 !cast<MaskedStoreSDNode>(N)->isNonTemporal(); 465}]>; 466// truncating masked store fragments. 467def trunc_masked_store : 468 PatFrag<(ops node:$val, node:$ptr, node:$pred), 469 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 470 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 471 cast<MaskedStoreSDNode>(N)->isUnindexed(); 472}]>; 473def trunc_masked_store_i8 : 474 PatFrag<(ops node:$val, node:$ptr, node:$pred), 475 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 476 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 477}]>; 478def trunc_masked_store_i16 : 479 PatFrag<(ops node:$val, node:$ptr, node:$pred), 480 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 481 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 482}]>; 483def trunc_masked_store_i32 : 484 PatFrag<(ops node:$val, node:$ptr, node:$pred), 485 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 486 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 487}]>; 488 489def non_temporal_store : 490 PatFrag<(ops node:$val, node:$ptr, node:$pred), 491 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 492 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 493 cast<MaskedStoreSDNode>(N)->isUnindexed() && 494 cast<MaskedStoreSDNode>(N)->isNonTemporal(); 495}]>; 496 497multiclass masked_gather_scatter<PatFrags GatherScatterOp> { 498 // offsets = (signed)Index << sizeof(elt) 499 def NAME#_signed_scaled : 500 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 501 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 502 auto MGS = cast<MaskedGatherScatterSDNode>(N); 503 bool Signed = MGS->isIndexSigned() || 504 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 505 return Signed && MGS->isIndexScaled(); 506 }]>; 507 // offsets = (signed)Index 508 def NAME#_signed_unscaled : 509 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 510 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 511 auto MGS = cast<MaskedGatherScatterSDNode>(N); 512 bool Signed = MGS->isIndexSigned() || 513 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 514 return Signed && !MGS->isIndexScaled(); 515 }]>; 516 // offsets = (unsigned)Index << sizeof(elt) 517 def NAME#_unsigned_scaled : 518 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 519 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 520 auto MGS = cast<MaskedGatherScatterSDNode>(N); 521 bool Signed = MGS->isIndexSigned() || 522 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 523 return !Signed && MGS->isIndexScaled(); 524 }]>; 525 // offsets = (unsigned)Index 526 def NAME#_unsigned_unscaled : 527 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 528 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 529 auto MGS = cast<MaskedGatherScatterSDNode>(N); 530 bool Signed = MGS->isIndexSigned() || 531 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 532 return !Signed && !MGS->isIndexScaled(); 533 }]>; 534} 535 536defm nonext_masked_gather : masked_gather_scatter<nonext_masked_gather>; 537defm azext_masked_gather_i8 : masked_gather_scatter<azext_masked_gather_i8>; 538defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>; 539defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>; 540defm sext_masked_gather_i8 : masked_gather_scatter<sext_masked_gather_i8>; 541defm sext_masked_gather_i16 : masked_gather_scatter<sext_masked_gather_i16>; 542defm sext_masked_gather_i32 : masked_gather_scatter<sext_masked_gather_i32>; 543 544defm nontrunc_masked_scatter : masked_gather_scatter<nontrunc_masked_scatter>; 545defm trunc_masked_scatter_i8 : masked_gather_scatter<trunc_masked_scatter_i8>; 546defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>; 547defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>; 548 549// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise 550def top16Zero: PatLeaf<(i32 GPR32:$src), [{ 551 return SDValue(N,0)->getValueType(0) == MVT::i32 && 552 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); 553 }]>; 554 555// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise 556def top32Zero: PatLeaf<(i64 GPR64:$src), [{ 557 return SDValue(N,0)->getValueType(0) == MVT::i64 && 558 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32)); 559 }]>; 560 561// topbitsallzero - Return true if all bits except the lowest bit are known zero 562def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{ 563 return SDValue(N,0)->getValueType(0) == MVT::i32 && 564 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31)); 565 }]>; 566def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{ 567 return SDValue(N,0)->getValueType(0) == MVT::i64 && 568 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63)); 569 }]>; 570 571// Node definitions. 572def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; 573def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; 574def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; 575def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; 576def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", 577 SDCallSeqStart<[ SDTCisVT<0, i32>, 578 SDTCisVT<1, i32> ]>, 579 [SDNPHasChain, SDNPOutGlue]>; 580def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", 581 SDCallSeqEnd<[ SDTCisVT<0, i32>, 582 SDTCisVT<1, i32> ]>, 583 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 584def AArch64call : SDNode<"AArch64ISD::CALL", 585 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 586 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 587 SDNPVariadic]>; 588 589def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", 590 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 591 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 592 SDNPVariadic]>; 593 594def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", 595 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 596 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 597 SDNPVariadic]>; 598 599def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, 600 [SDNPHasChain]>; 601def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, 602 [SDNPHasChain]>; 603def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, 604 [SDNPHasChain]>; 605def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, 606 [SDNPHasChain]>; 607def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, 608 [SDNPHasChain]>; 609 610 611def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; 612def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; 613def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; 614def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; 615def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone, 616 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 617def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; 618def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; 619def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, 620 [SDNPCommutative]>; 621def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; 622def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, 623 [SDNPCommutative]>; 624def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; 625def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; 626 627def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; 628def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; 629def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; 630 631def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; 632 633def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; 634def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, 635 [SDNPHasChain]>; 636def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, 637 [SDNPHasChain]>; 638def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), 639 [(AArch64strict_fcmp node:$lhs, node:$rhs), 640 (AArch64fcmp node:$lhs, node:$rhs)]>; 641 642def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; 643def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; 644def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; 645def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; 646def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; 647def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; 648 649def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; 650 651def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; 652def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; 653def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; 654def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; 655def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; 656def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; 657 658def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; 659def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; 660def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; 661def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; 662def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; 663def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; 664def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; 665 666def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; 667def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; 668def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; 669def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; 670 671def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; 672def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; 673def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; 674def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; 675def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; 676def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; 677def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; 678def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; 679def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; 680def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; 681 682def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; 683def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; 684 685def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; 686def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; 687def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; 688def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; 689def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; 690 691def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; 692def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; 693def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; 694 695def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; 696def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; 697def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; 698def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; 699def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; 700def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), 701 (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>; 702 703def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; 704def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; 705def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; 706def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; 707def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; 708 709def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; 710def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; 711 712def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, 713 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 714 715def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, 716 [SDNPHasChain, SDNPSideEffect]>; 717 718def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; 719def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; 720 721def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", 722 SDT_AArch64TLSDescCallSeq, 723 [SDNPInGlue, SDNPOutGlue, SDNPHasChain, 724 SDNPVariadic]>; 725 726 727def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", 728 SDT_AArch64WrapperLarge>; 729 730def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; 731 732def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 733 SDTCisSameAs<1, 2>]>; 734def AArch64pmull : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull, 735 [SDNPCommutative]>; 736def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull, 737 [SDNPCommutative]>; 738def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull, 739 [SDNPCommutative]>; 740 741def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; 742def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; 743def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; 744def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; 745 746def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>; 747def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>; 748 749def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; 750def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; 751def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; 752def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; 753def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; 754def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; 755 756def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), 757 [(abdu node:$lhs, node:$rhs), 758 (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; 759def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), 760 [(abds node:$lhs, node:$rhs), 761 (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; 762 763def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>; 764def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>; 765def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>; 766def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm), 767 [(AArch64addp_n node:$Rn, node:$Rm), 768 (int_aarch64_neon_addp node:$Rn, node:$Rm)]>; 769def AArch64uaddlp : PatFrags<(ops node:$src), 770 [(AArch64uaddlp_n node:$src), 771 (int_aarch64_neon_uaddlp node:$src)]>; 772def AArch64saddlp : PatFrags<(ops node:$src), 773 [(AArch64saddlp_n node:$src), 774 (int_aarch64_neon_saddlp node:$src)]>; 775def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm), 776 [(AArch64addp_n node:$Rn, node:$Rm), 777 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>; 778def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>; 779def AArch64facge : PatFrags<(ops node:$Rn, node:$Rm), 780 [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)), 781 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>; 782def AArch64facgt : PatFrags<(ops node:$Rn, node:$Rm), 783 [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)), 784 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>; 785 786def AArch64fmaxnmv : PatFrags<(ops node:$Rn), 787 [(vecreduce_fmax node:$Rn), 788 (int_aarch64_neon_fmaxnmv node:$Rn)]>; 789def AArch64fminnmv : PatFrags<(ops node:$Rn), 790 [(vecreduce_fmin node:$Rn), 791 (int_aarch64_neon_fminnmv node:$Rn)]>; 792def AArch64fmaxv : PatFrags<(ops node:$Rn), 793 [(vecreduce_fmaximum node:$Rn), 794 (int_aarch64_neon_fmaxv node:$Rn)]>; 795def AArch64fminv : PatFrags<(ops node:$Rn), 796 [(vecreduce_fminimum node:$Rn), 797 (int_aarch64_neon_fminv node:$Rn)]>; 798 799def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; 800def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 801def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 802def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 803def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 804 805def SDT_AArch64unpk : SDTypeProfile<1, 1, [ 806 SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0> 807]>; 808def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>; 809def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>; 810def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>; 811def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; 812 813def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 814def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 815def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 816def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 817def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 818def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 819 820def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; 821def AArch64mrs : SDNode<"AArch64ISD::MRS", 822 SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, 823 [SDNPHasChain, SDNPOutGlue]>; 824 825// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands 826// have no common bits. 827def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), 828 [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ 829 if (N->getOpcode() == ISD::ADD) 830 return true; 831 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); 832}]> { 833 let GISelPredicateCode = [{ 834 // Only handle G_ADD for now. FIXME. build capability to compute whether 835 // operands of G_OR have common bits set or not. 836 return MI.getOpcode() == TargetOpcode::G_ADD; 837 }]; 838} 839 840// Match mul with enough sign-bits. Can be reduced to a smaller mul operand. 841def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{ 842 return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 && 843 CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32; 844}]>; 845 846//===----------------------------------------------------------------------===// 847 848//===----------------------------------------------------------------------===// 849 850// AArch64 Instruction Predicate Definitions. 851// We could compute these on a per-module basis but doing so requires accessing 852// the Function object through the <Target>Subtarget and objections were raised 853// to that (see post-commit review comments for r301750). 854let RecomputePerFunction = 1 in { 855 def ForCodeSize : Predicate<"shouldOptForSize(MF)">; 856 def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">; 857 // Avoid generating STRQro if it is slow, unless we're optimizing for code size. 858 def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">; 859 860 def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 861 def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 862 863 def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 864 def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 865 // Toggles patterns which aren't beneficial in GlobalISel when we aren't 866 // optimizing. This allows us to selectively use patterns without impacting 867 // SelectionDAG's behaviour. 868 // FIXME: One day there will probably be a nicer way to check for this, but 869 // today is not that day. 870 def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; 871} 872 873include "AArch64InstrFormats.td" 874include "SVEInstrFormats.td" 875include "SMEInstrFormats.td" 876 877//===----------------------------------------------------------------------===// 878 879//===----------------------------------------------------------------------===// 880// Miscellaneous instructions. 881//===----------------------------------------------------------------------===// 882 883let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { 884// We set Sched to empty list because we expect these instructions to simply get 885// removed in most cases. 886def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 887 [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, 888 Sched<[]>; 889def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 890 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, 891 Sched<[]>; 892} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 893 894let isReMaterializable = 1, isCodeGenOnly = 1 in { 895// FIXME: The following pseudo instructions are only needed because remat 896// cannot handle multiple instructions. When that changes, they can be 897// removed, along with the AArch64Wrapper node. 898 899let AddedComplexity = 10 in 900def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), 901 [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, 902 Sched<[WriteLDAdr]>; 903 904// The MOVaddr instruction should match only when the add is not folded 905// into a load or store address. 906def MOVaddr 907 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 908 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), 909 tglobaladdr:$low))]>, 910 Sched<[WriteAdrAdr]>; 911def MOVaddrJT 912 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 913 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), 914 tjumptable:$low))]>, 915 Sched<[WriteAdrAdr]>; 916def MOVaddrCP 917 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 918 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), 919 tconstpool:$low))]>, 920 Sched<[WriteAdrAdr]>; 921def MOVaddrBA 922 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 923 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), 924 tblockaddress:$low))]>, 925 Sched<[WriteAdrAdr]>; 926def MOVaddrTLS 927 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 928 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), 929 tglobaltlsaddr:$low))]>, 930 Sched<[WriteAdrAdr]>; 931def MOVaddrEXT 932 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 933 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), 934 texternalsym:$low))]>, 935 Sched<[WriteAdrAdr]>; 936// Normally AArch64addlow either gets folded into a following ldr/str, 937// or together with an adrp into MOVaddr above. For cases with TLS, it 938// might appear without either of them, so allow lowering it into a plain 939// add. 940def ADDlowTLS 941 : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), 942 [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, 943 tglobaltlsaddr:$low))]>, 944 Sched<[WriteAdr]>; 945 946} // isReMaterializable, isCodeGenOnly 947 948def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), 949 (LOADgot tglobaltlsaddr:$addr)>; 950 951def : Pat<(AArch64LOADgot texternalsym:$addr), 952 (LOADgot texternalsym:$addr)>; 953 954def : Pat<(AArch64LOADgot tconstpool:$addr), 955 (LOADgot tconstpool:$addr)>; 956 957// In general these get lowered into a sequence of three 4-byte instructions. 958// 32-bit jump table destination is actually only 2 instructions since we can 959// use the table itself as a PC-relative base. But optimization occurs after 960// branch relaxation so be pessimistic. 961let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch", 962 isNotDuplicable = 1 in { 963def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 964 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 965 Sched<[]>; 966def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 967 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 968 Sched<[]>; 969def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 970 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 971 Sched<[]>; 972} 973 974// Space-consuming pseudo to aid testing of placement and reachability 975// algorithms. Immediate operand is the number of bytes this "instruction" 976// occupies; register operands can be used to enforce dependency and constrain 977// the scheduler. 978let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in 979def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn), 980 [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>, 981 Sched<[]>; 982 983let hasSideEffects = 1, isCodeGenOnly = 1 in { 984 def SpeculationSafeValueX 985 : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>; 986 def SpeculationSafeValueW 987 : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>; 988} 989 990// SpeculationBarrierEndBB must only be used after an unconditional control 991// flow, i.e. after a terminator for which isBarrier is True. 992let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { 993 // This gets lowered to a pair of 4-byte instructions. 994 let Size = 8 in 995 def SpeculationBarrierISBDSBEndBB 996 : Pseudo<(outs), (ins), []>, Sched<[]>; 997 // This gets lowered to a 4-byte instruction. 998 let Size = 4 in 999 def SpeculationBarrierSBEndBB 1000 : Pseudo<(outs), (ins), []>, Sched<[]>; 1001} 1002 1003//===----------------------------------------------------------------------===// 1004// System instructions. 1005//===----------------------------------------------------------------------===// 1006 1007def HINT : HintI<"hint">; 1008def : InstAlias<"nop", (HINT 0b000)>; 1009def : InstAlias<"yield",(HINT 0b001)>; 1010def : InstAlias<"wfe", (HINT 0b010)>; 1011def : InstAlias<"wfi", (HINT 0b011)>; 1012def : InstAlias<"sev", (HINT 0b100)>; 1013def : InstAlias<"sevl", (HINT 0b101)>; 1014def : InstAlias<"dgh", (HINT 0b110)>; 1015def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; 1016def : InstAlias<"csdb", (HINT 20)>; 1017// In order to be able to write readable assembly, LLVM should accept assembly 1018// inputs that use Branch Target Indentification mnemonics, even with BTI disabled. 1019// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1020// should not emit these mnemonics unless BTI is enabled. 1021def : InstAlias<"bti", (HINT 32), 0>; 1022def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; 1023def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; 1024def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; 1025 1026// v8.2a Statistical Profiling extension 1027def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; 1028 1029// As far as LLVM is concerned this writes to the system's exclusive monitors. 1030let mayLoad = 1, mayStore = 1 in 1031def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; 1032 1033// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot 1034// model patterns with sufficiently fine granularity. 1035let mayLoad = ?, mayStore = ? in { 1036def DMB : CRmSystemI<barrier_op, 0b101, "dmb", 1037 [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>; 1038 1039def DSB : CRmSystemI<barrier_op, 0b100, "dsb", 1040 [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>; 1041 1042def ISB : CRmSystemI<barrier_op, 0b110, "isb", 1043 [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>; 1044 1045def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> { 1046 let CRm = 0b0010; 1047 let Inst{12} = 0; 1048 let Predicates = [HasTRACEV8_4]; 1049} 1050 1051def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> { 1052 let CRm{1-0} = 0b11; 1053 let Inst{9-8} = 0b10; 1054 let Predicates = [HasXS]; 1055} 1056 1057let Predicates = [HasWFxT] in { 1058def WFET : RegInputSystemI<0b0000, 0b000, "wfet">; 1059def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">; 1060} 1061 1062// Branch Record Buffer two-word mnemonic instructions 1063class BRBEI<bits<3> op2, string keyword> 1064 : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> { 1065 let Inst{31-8} = 0b110101010000100101110010; 1066 let Inst{7-5} = op2; 1067 let Predicates = [HasBRBE]; 1068} 1069def BRB_IALL: BRBEI<0b100, "\tiall">; 1070def BRB_INJ: BRBEI<0b101, "\tinj">; 1071 1072} 1073 1074// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ 1075def : TokenAlias<"INJ", "inj">; 1076def : TokenAlias<"IALL", "iall">; 1077 1078 1079// ARMv9.4-A Guarded Control Stack 1080class GCSNoOp<bits<3> op2, string mnemonic> 1081 : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> { 1082 let Inst{20-8} = 0b0100001110111; 1083 let Inst{7-5} = op2; 1084 let Predicates = [HasGCS]; 1085} 1086def GCSPUSHX : GCSNoOp<0b100, "gcspushx">; 1087def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">; 1088def GCSPOPX : GCSNoOp<0b110, "gcspopx">; 1089 1090class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic, 1091 list<dag> pattern = []> 1092 : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> { 1093 let Inst{20-19} = 0b01; 1094 let Inst{18-16} = op1; 1095 let Inst{15-8} = 0b01110111; 1096 let Inst{7-5} = op2; 1097 let Predicates = [HasGCS]; 1098} 1099 1100def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">; 1101def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">; 1102 1103class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic, 1104 list<dag> pattern = []> 1105 : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> { 1106 let Inst{20-19} = 0b01; 1107 let Inst{18-16} = op1; 1108 let Inst{15-8} = 0b01110111; 1109 let Inst{7-5} = op2; 1110 let Predicates = [HasGCS]; 1111} 1112 1113def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">; 1114def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">; 1115def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent 1116 1117def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>; 1118def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>; 1119 1120def : TokenAlias<"DSYNC", "dsync">; 1121 1122let Uses = [X16], Defs = [X16], CRm = 0b0101 in { 1123 def CHKFEAT : SystemNoOperands<0b000, "hint\t#40">; 1124} 1125def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>; 1126def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>; 1127 1128class GCSSt<string mnemonic, bits<3> op> 1129 : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, $Rn", "", []>, Sched<[]> { 1130 bits<5> Rt; 1131 bits<5> Rn; 1132 let Inst{31-15} = 0b11011001000111110; 1133 let Inst{14-12} = op; 1134 let Inst{11-10} = 0b11; 1135 let Inst{9-5} = Rn; 1136 let Inst{4-0} = Rt; 1137 let Predicates = [HasGCS]; 1138} 1139def GCSSTR : GCSSt<"gcsstr", 0b000>; 1140def GCSSTTR : GCSSt<"gcssttr", 0b001>; 1141 1142 1143// ARMv8.2-A Dot Product 1144let Predicates = [HasDotProd] in { 1145defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>; 1146defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>; 1147defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>; 1148defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>; 1149} 1150 1151// ARMv8.6-A BFloat 1152let Predicates = [HasNEON, HasBF16] in { 1153defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">; 1154defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">; 1155def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">; 1156def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1157def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1158def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1159def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1160def BFCVTN : SIMD_BFCVTN; 1161def BFCVTN2 : SIMD_BFCVTN2; 1162 1163// Vector-scalar BFDOT: 1164// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit 1165// register (the instruction uses a single 32-bit lane from it), so the pattern 1166// is a bit tricky. 1167def : Pat<(v2f32 (int_aarch64_neon_bfdot 1168 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1169 (v4bf16 (bitconvert 1170 (v2i32 (AArch64duplane32 1171 (v4i32 (bitconvert 1172 (v8bf16 (insert_subvector undef, 1173 (v4bf16 V64:$Rm), 1174 (i64 0))))), 1175 VectorIndexS:$idx)))))), 1176 (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1177 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 1178 VectorIndexS:$idx)>; 1179} 1180 1181let Predicates = [HasNEONorSME, HasBF16] in { 1182def BFCVT : BF16ToSinglePrecision<"bfcvt">; 1183} 1184 1185// ARMv8.6A AArch64 matrix multiplication 1186let Predicates = [HasMatMulInt8] in { 1187def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>; 1188def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>; 1189def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>; 1190defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>; 1191defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>; 1192 1193// sudot lane has a pattern where usdot is expected (there is no sudot). 1194// The second operand is used in the dup operation to repeat the indexed 1195// element. 1196class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind, 1197 string rhs_kind, RegisterOperand RegType, 1198 ValueType AccumType, ValueType InputType> 1199 : BaseSIMDThreeSameVectorDotIndex<Q, 0, 1, 0b00, "sudot", dst_kind, 1200 lhs_kind, rhs_kind, RegType, AccumType, 1201 InputType, null_frag> { 1202 let Pattern = [(set (AccumType RegType:$dst), 1203 (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd), 1204 (InputType (bitconvert (AccumType 1205 (AArch64duplane32 (v4i32 V128:$Rm), 1206 VectorIndexS:$idx)))), 1207 (InputType RegType:$Rn))))]; 1208} 1209 1210multiclass SIMDSUDOTIndex { 1211 def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>; 1212 def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>; 1213} 1214 1215defm SUDOTlane : SIMDSUDOTIndex; 1216 1217} 1218 1219// ARMv8.2-A FP16 Fused Multiply-Add Long 1220let Predicates = [HasNEON, HasFP16FML] in { 1221defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; 1222defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>; 1223defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>; 1224defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>; 1225defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>; 1226defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>; 1227defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>; 1228defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>; 1229} 1230 1231// Armv8.2-A Crypto extensions 1232let Predicates = [HasSHA3] in { 1233def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; 1234def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">; 1235def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">; 1236def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">; 1237def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">; 1238def EOR3 : CryptoRRRR_16B<0b00, "eor3">; 1239def BCAX : CryptoRRRR_16B<0b01, "bcax">; 1240def XAR : CryptoRRRi6<"xar">; 1241 1242class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy> 1243 : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))), 1244 (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>; 1245 1246def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1247 (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1248 1249def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>; 1250def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>; 1251def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>; 1252 1253def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>; 1254def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>; 1255def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>; 1256def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>; 1257 1258class EOR3_pattern<ValueType VecTy> 1259 : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)), 1260 (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1261 1262def : EOR3_pattern<v16i8>; 1263def : EOR3_pattern<v8i16>; 1264def : EOR3_pattern<v4i32>; 1265def : EOR3_pattern<v2i64>; 1266 1267class BCAX_pattern<ValueType VecTy> 1268 : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))), 1269 (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1270 1271def : BCAX_pattern<v16i8>; 1272def : BCAX_pattern<v8i16>; 1273def : BCAX_pattern<v4i32>; 1274def : BCAX_pattern<v2i64>; 1275 1276def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>; 1277def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>; 1278def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>; 1279def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>; 1280 1281def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>; 1282def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>; 1283def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>; 1284def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>; 1285 1286def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>; 1287def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>; 1288def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>; 1289def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>; 1290 1291def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1292 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1293 1294def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))), 1295 (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>; 1296 1297def : Pat<(xor (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))), 1298 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1299 1300} // HasSHA3 1301 1302let Predicates = [HasSM4] in { 1303def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">; 1304def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">; 1305def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">; 1306def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">; 1307def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">; 1308def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">; 1309def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">; 1310def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">; 1311def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">; 1312 1313def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))), 1314 (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>; 1315 1316class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode> 1317 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1318 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1319 1320class SM3TT_pattern<Instruction INST, Intrinsic OpNode> 1321 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )), 1322 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>; 1323 1324class SM4_pattern<Instruction INST, Intrinsic OpNode> 1325 : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1326 (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1327 1328def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>; 1329def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>; 1330 1331def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>; 1332def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>; 1333def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>; 1334def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>; 1335 1336def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>; 1337def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>; 1338} // HasSM4 1339 1340let Predicates = [HasRCPC] in { 1341 // v8.3 Release Consistent Processor Consistent support, optional in v8.2. 1342 def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; 1343 def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; 1344 def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; 1345 def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; 1346} 1347 1348// v8.3a complex add and multiply-accumulate. No predicate here, that is done 1349// inside the multiclass as the FP16 versions need different predicates. 1350defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, 1351 "fcmla", null_frag>; 1352defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, 1353 "fcadd", null_frag>; 1354defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">; 1355 1356let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1357 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1358 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>; 1359 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1360 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>; 1361 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1362 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>; 1363 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1364 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; 1365} 1366 1367let Predicates = [HasComplxNum, HasNEON] in { 1368 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1369 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; 1370 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1371 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>; 1372 foreach Ty = [v4f32, v2f64] in { 1373 def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))), 1374 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>; 1375 def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))), 1376 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>; 1377 } 1378} 1379 1380multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> { 1381 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1382 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>; 1383 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1384 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>; 1385 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1386 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>; 1387 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1388 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>; 1389} 1390 1391multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> { 1392 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1393 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>; 1394 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1395 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>; 1396 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1397 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>; 1398 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1399 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>; 1400} 1401 1402 1403let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1404 defm : FCMLA_PATS<v4f16, V64>; 1405 defm : FCMLA_PATS<v8f16, V128>; 1406 1407 defm : FCMLA_LANE_PATS<v4f16, V64, 1408 (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>; 1409 defm : FCMLA_LANE_PATS<v8f16, V128, 1410 (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>; 1411} 1412let Predicates = [HasComplxNum, HasNEON] in { 1413 defm : FCMLA_PATS<v2f32, V64>; 1414 defm : FCMLA_PATS<v4f32, V128>; 1415 defm : FCMLA_PATS<v2f64, V128>; 1416 1417 defm : FCMLA_LANE_PATS<v4f32, V128, 1418 (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>; 1419} 1420 1421// v8.3a Pointer Authentication 1422// These instructions inhabit part of the hint space and so can be used for 1423// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is 1424// important for compatibility with other assemblers (e.g. GAS) when building 1425// software compatible with both CPUs that do or don't implement PA. 1426let Uses = [LR], Defs = [LR] in { 1427 def PACIAZ : SystemNoOperands<0b000, "hint\t#24">; 1428 def PACIBZ : SystemNoOperands<0b010, "hint\t#26">; 1429 let isAuthenticated = 1 in { 1430 def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">; 1431 def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">; 1432 } 1433} 1434let Uses = [LR, SP], Defs = [LR] in { 1435 def PACIASP : SystemNoOperands<0b001, "hint\t#25">; 1436 def PACIBSP : SystemNoOperands<0b011, "hint\t#27">; 1437 let isAuthenticated = 1 in { 1438 def AUTIASP : SystemNoOperands<0b101, "hint\t#29">; 1439 def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">; 1440 } 1441} 1442let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { 1443 def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">; 1444 def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">; 1445 let isAuthenticated = 1 in { 1446 def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">; 1447 def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">; 1448 } 1449} 1450 1451let Uses = [LR], Defs = [LR], CRm = 0b0000 in { 1452 def XPACLRI : SystemNoOperands<0b111, "hint\t#7">; 1453} 1454 1455// In order to be able to write readable assembly, LLVM should accept assembly 1456// inputs that use pointer authentication mnemonics, even with PA disabled. 1457// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1458// should not emit these mnemonics unless PA is enabled. 1459def : InstAlias<"paciaz", (PACIAZ), 0>; 1460def : InstAlias<"pacibz", (PACIBZ), 0>; 1461def : InstAlias<"autiaz", (AUTIAZ), 0>; 1462def : InstAlias<"autibz", (AUTIBZ), 0>; 1463def : InstAlias<"paciasp", (PACIASP), 0>; 1464def : InstAlias<"pacibsp", (PACIBSP), 0>; 1465def : InstAlias<"autiasp", (AUTIASP), 0>; 1466def : InstAlias<"autibsp", (AUTIBSP), 0>; 1467def : InstAlias<"pacia1716", (PACIA1716), 0>; 1468def : InstAlias<"pacib1716", (PACIB1716), 0>; 1469def : InstAlias<"autia1716", (AUTIA1716), 0>; 1470def : InstAlias<"autib1716", (AUTIB1716), 0>; 1471def : InstAlias<"xpaclri", (XPACLRI), 0>; 1472 1473// These pointer authentication instructions require armv8.3a 1474let Predicates = [HasPAuth] in { 1475 1476 // When PA is enabled, a better mnemonic should be emitted. 1477 def : InstAlias<"paciaz", (PACIAZ), 1>; 1478 def : InstAlias<"pacibz", (PACIBZ), 1>; 1479 def : InstAlias<"autiaz", (AUTIAZ), 1>; 1480 def : InstAlias<"autibz", (AUTIBZ), 1>; 1481 def : InstAlias<"paciasp", (PACIASP), 1>; 1482 def : InstAlias<"pacibsp", (PACIBSP), 1>; 1483 def : InstAlias<"autiasp", (AUTIASP), 1>; 1484 def : InstAlias<"autibsp", (AUTIBSP), 1>; 1485 def : InstAlias<"pacia1716", (PACIA1716), 1>; 1486 def : InstAlias<"pacib1716", (PACIB1716), 1>; 1487 def : InstAlias<"autia1716", (AUTIA1716), 1>; 1488 def : InstAlias<"autib1716", (AUTIB1716), 1>; 1489 def : InstAlias<"xpaclri", (XPACLRI), 1>; 1490 1491 multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm, 1492 SDPatternOperator op> { 1493 def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>; 1494 def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>; 1495 def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>; 1496 def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>; 1497 def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>; 1498 def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>; 1499 def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>; 1500 def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>; 1501 } 1502 1503 defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>; 1504 defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>; 1505 1506 def XPACI : ClearAuth<0, "xpaci">; 1507 def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>; 1508 def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>; 1509 1510 def XPACD : ClearAuth<1, "xpacd">; 1511 def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>; 1512 def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>; 1513 1514 def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>; 1515 1516 // Combined Instructions 1517 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1518 def BRAA : AuthBranchTwoOperands<0, 0, "braa">; 1519 def BRAB : AuthBranchTwoOperands<0, 1, "brab">; 1520 } 1521 let isCall = 1, Defs = [LR], Uses = [SP] in { 1522 def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; 1523 def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; 1524 } 1525 1526 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1527 def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; 1528 def BRABZ : AuthOneOperand<0b000, 1, "brabz">; 1529 } 1530 let isCall = 1, Defs = [LR], Uses = [SP] in { 1531 def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; 1532 def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; 1533 } 1534 1535 let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 1536 def RETAA : AuthReturn<0b010, 0, "retaa">; 1537 def RETAB : AuthReturn<0b010, 1, "retab">; 1538 def ERETAA : AuthReturn<0b100, 0, "eretaa">; 1539 def ERETAB : AuthReturn<0b100, 1, "eretab">; 1540 } 1541 1542 defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; 1543 defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; 1544 1545} 1546 1547// v8.3a floating point conversion for javascript 1548let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in 1549def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, 1550 "fjcvtzs", 1551 [(set GPR32:$Rd, 1552 (int_aarch64_fjcvtzs FPR64:$Rn))]> { 1553 let Inst{31} = 0; 1554} // HasJS, HasFPARMv8 1555 1556// v8.4 Flag manipulation instructions 1557let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in { 1558def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { 1559 let Inst{20-5} = 0b0000001000000000; 1560} 1561def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">; 1562def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; 1563def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", 1564 "{\t$Rn, $imm, $mask}">; 1565} // HasFlagM 1566 1567// v8.5 flag manipulation instructions 1568let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in { 1569 1570def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> { 1571 let Inst{18-16} = 0b000; 1572 let Inst{11-8} = 0b0000; 1573 let Unpredictable{11-8} = 0b1111; 1574 let Inst{7-5} = 0b001; 1575} 1576 1577def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> { 1578 let Inst{18-16} = 0b000; 1579 let Inst{11-8} = 0b0000; 1580 let Unpredictable{11-8} = 0b1111; 1581 let Inst{7-5} = 0b010; 1582} 1583} // HasAltNZCV 1584 1585 1586// Armv8.5-A speculation barrier 1587def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> { 1588 let Inst{20-5} = 0b0001100110000111; 1589 let Unpredictable{11-8} = 0b1111; 1590 let Predicates = [HasSB]; 1591 let hasSideEffects = 1; 1592} 1593 1594def : InstAlias<"clrex", (CLREX 0xf)>; 1595def : InstAlias<"isb", (ISB 0xf)>; 1596def : InstAlias<"ssbb", (DSB 0)>; 1597def : InstAlias<"pssbb", (DSB 4)>; 1598def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>; 1599 1600def MRS : MRSI; 1601def MSR : MSRI; 1602def MSRpstateImm1 : MSRpstateImm0_1; 1603def MSRpstateImm4 : MSRpstateImm0_15; 1604 1605def : Pat<(AArch64mrs imm:$id), 1606 (MRS imm:$id)>; 1607 1608// The thread pointer (on Linux, at least, where this has been implemented) is 1609// TPIDR_EL0. 1610def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), 1611 [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; 1612 1613// This gets lowered into a 24-byte instruction sequence 1614let Defs = [ X9, X16, X17, NZCV ], Size = 24 in { 1615def KCFI_CHECK : Pseudo< 1616 (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>; 1617} 1618 1619let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { 1620def HWASAN_CHECK_MEMACCESS : Pseudo< 1621 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1622 [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1623 Sched<[]>; 1624} 1625 1626let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in { 1627def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< 1628 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1629 [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1630 Sched<[]>; 1631} 1632 1633// The virtual cycle counter register is CNTVCT_EL0. 1634def : Pat<(readcyclecounter), (MRS 0xdf02)>; 1635 1636// FPCR register 1637let Uses = [FPCR] in 1638def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins), 1639 [(set GPR64:$dst, (int_aarch64_get_fpcr))]>, 1640 PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>, 1641 Sched<[WriteSys]>; 1642let Defs = [FPCR] in 1643def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val), 1644 [(int_aarch64_set_fpcr i64:$val)]>, 1645 PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>, 1646 Sched<[WriteSys]>; 1647 1648// Generic system instructions 1649def SYSxt : SystemXtI<0, "sys">; 1650def SYSLxt : SystemLXtI<1, "sysl">; 1651 1652def : InstAlias<"sys $op1, $Cn, $Cm, $op2", 1653 (SYSxt imm0_7:$op1, sys_cr_op:$Cn, 1654 sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 1655 1656 1657let Predicates = [HasTME] in { 1658 1659def TSTART : TMSystemI<0b0000, "tstart", 1660 [(set GPR64:$Rt, (int_aarch64_tstart))]>; 1661 1662def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; 1663 1664def TCANCEL : TMSystemException<0b011, "tcancel", 1665 [(int_aarch64_tcancel timm64_0_65535:$imm)]>; 1666 1667def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> { 1668 let mayLoad = 0; 1669 let mayStore = 0; 1670} 1671} // HasTME 1672 1673//===----------------------------------------------------------------------===// 1674// Move immediate instructions. 1675//===----------------------------------------------------------------------===// 1676 1677defm MOVK : InsertImmediate<0b11, "movk">; 1678defm MOVN : MoveImmediate<0b00, "movn">; 1679 1680let PostEncoderMethod = "fixMOVZ" in 1681defm MOVZ : MoveImmediate<0b10, "movz">; 1682 1683// First group of aliases covers an implicit "lsl #0". 1684def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>; 1685def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>; 1686def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1687def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1688def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1689def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1690 1691// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. 1692def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1693def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1694def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1695def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1696 1697def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1698def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1699def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1700def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1701 1702def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>; 1703def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>; 1704def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>; 1705def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>; 1706 1707def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1708def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1709 1710def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1711def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1712 1713def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>; 1714def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>; 1715 1716// Final group of aliases covers true "mov $Rd, $imm" cases. 1717multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, 1718 int width, int shift> { 1719 def _asmoperand : AsmOperandClass { 1720 let Name = basename # width # "_lsl" # shift # "MovAlias"; 1721 let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " 1722 # shift # ">"; 1723 let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; 1724 } 1725 1726 def _movimm : Operand<i32> { 1727 let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); 1728 } 1729 1730 def : InstAlias<"mov $Rd, $imm", 1731 (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; 1732} 1733 1734defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; 1735defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; 1736 1737defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; 1738defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; 1739defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; 1740defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; 1741 1742defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; 1743defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; 1744 1745defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; 1746defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; 1747defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; 1748defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; 1749 1750let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, 1751 isAsCheapAsAMove = 1 in { 1752// FIXME: The following pseudo instructions are only needed because remat 1753// cannot handle multiple instructions. When that changes, we can select 1754// directly to the real instructions and get rid of these pseudos. 1755 1756def MOVi32imm 1757 : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), 1758 [(set GPR32:$dst, imm:$src)]>, 1759 Sched<[WriteImm]>; 1760def MOVi64imm 1761 : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), 1762 [(set GPR64:$dst, imm:$src)]>, 1763 Sched<[WriteImm]>; 1764} // isReMaterializable, isCodeGenOnly 1765 1766// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the 1767// eventual expansion code fewer bits to worry about getting right. Marshalling 1768// the types is a little tricky though: 1769def i64imm_32bit : ImmLeaf<i64, [{ 1770 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 1771}]>; 1772 1773def s64imm_32bit : ImmLeaf<i64, [{ 1774 int64_t Imm64 = static_cast<int64_t>(Imm); 1775 return Imm64 >= std::numeric_limits<int32_t>::min() && 1776 Imm64 <= std::numeric_limits<int32_t>::max(); 1777}]>; 1778 1779def trunc_imm : SDNodeXForm<imm, [{ 1780 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); 1781}]>; 1782 1783def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, 1784 GISDNodeXFormEquiv<trunc_imm>; 1785 1786let Predicates = [OptimizedGISelOrOtherSelector] in { 1787// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless 1788// copies. 1789def : Pat<(i64 i64imm_32bit:$src), 1790 (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; 1791} 1792 1793// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). 1794def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 1795return CurDAG->getTargetConstant( 1796 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 1797}]>; 1798 1799def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 1800return CurDAG->getTargetConstant( 1801 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 1802}]>; 1803 1804 1805def : Pat<(f32 fpimm:$in), 1806 (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; 1807def : Pat<(f64 fpimm:$in), 1808 (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; 1809 1810 1811// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK 1812// sequences. 1813def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, 1814 tglobaladdr:$g1, tglobaladdr:$g0), 1815 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), 1816 tglobaladdr:$g1, 16), 1817 tglobaladdr:$g2, 32), 1818 tglobaladdr:$g3, 48)>; 1819 1820def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, 1821 tblockaddress:$g1, tblockaddress:$g0), 1822 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), 1823 tblockaddress:$g1, 16), 1824 tblockaddress:$g2, 32), 1825 tblockaddress:$g3, 48)>; 1826 1827def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, 1828 tconstpool:$g1, tconstpool:$g0), 1829 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), 1830 tconstpool:$g1, 16), 1831 tconstpool:$g2, 32), 1832 tconstpool:$g3, 48)>; 1833 1834def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, 1835 tjumptable:$g1, tjumptable:$g0), 1836 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), 1837 tjumptable:$g1, 16), 1838 tjumptable:$g2, 32), 1839 tjumptable:$g3, 48)>; 1840 1841 1842//===----------------------------------------------------------------------===// 1843// Arithmetic instructions. 1844//===----------------------------------------------------------------------===// 1845 1846// Add/subtract with carry. 1847defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; 1848defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; 1849 1850def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; 1851def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; 1852def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; 1853def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; 1854 1855// Add/subtract 1856defm ADD : AddSub<0, "add", "sub", add>; 1857defm SUB : AddSub<1, "sub", "add">; 1858 1859def : InstAlias<"mov $dst, $src", 1860 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; 1861def : InstAlias<"mov $dst, $src", 1862 (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; 1863def : InstAlias<"mov $dst, $src", 1864 (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; 1865def : InstAlias<"mov $dst, $src", 1866 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; 1867 1868defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; 1869defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; 1870 1871def copyFromSP: PatLeaf<(i64 GPR64:$src), [{ 1872 return N->getOpcode() == ISD::CopyFromReg && 1873 cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP; 1874}]>; 1875 1876// Use SUBS instead of SUB to enable CSE between SUBS and SUB. 1877def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), 1878 (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; 1879def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), 1880 (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; 1881def : Pat<(sub GPR32:$Rn, GPR32:$Rm), 1882 (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; 1883def : Pat<(sub GPR64:$Rn, GPR64:$Rm), 1884 (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; 1885def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), 1886 (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; 1887def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), 1888 (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; 1889let AddedComplexity = 1 in { 1890def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), 1891 (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; 1892def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), 1893 (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; 1894def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)), 1895 (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>; 1896} 1897 1898// Because of the immediate format for add/sub-imm instructions, the 1899// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 1900// These patterns capture that transformation. 1901let AddedComplexity = 1 in { 1902def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1903 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1904def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1905 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1906def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1907 (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1908def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1909 (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1910} 1911 1912// Because of the immediate format for add/sub-imm instructions, the 1913// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 1914// These patterns capture that transformation. 1915let AddedComplexity = 1 in { 1916def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1917 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1918def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1919 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1920def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1921 (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1922def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1923 (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1924} 1925 1926def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 1927def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 1928def : InstAlias<"neg $dst, $src$shift", 1929 (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 1930def : InstAlias<"neg $dst, $src$shift", 1931 (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 1932 1933def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 1934def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 1935def : InstAlias<"negs $dst, $src$shift", 1936 (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 1937def : InstAlias<"negs $dst, $src$shift", 1938 (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 1939 1940 1941// Unsigned/Signed divide 1942defm UDIV : Div<0, "udiv", udiv>; 1943defm SDIV : Div<1, "sdiv", sdiv>; 1944 1945def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; 1946def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; 1947def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; 1948def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; 1949 1950// Variable shift 1951defm ASRV : Shift<0b10, "asr", sra>; 1952defm LSLV : Shift<0b00, "lsl", shl>; 1953defm LSRV : Shift<0b01, "lsr", srl>; 1954defm RORV : Shift<0b11, "ror", rotr>; 1955 1956def : ShiftAlias<"asrv", ASRVWr, GPR32>; 1957def : ShiftAlias<"asrv", ASRVXr, GPR64>; 1958def : ShiftAlias<"lslv", LSLVWr, GPR32>; 1959def : ShiftAlias<"lslv", LSLVXr, GPR64>; 1960def : ShiftAlias<"lsrv", LSRVWr, GPR32>; 1961def : ShiftAlias<"lsrv", LSRVXr, GPR64>; 1962def : ShiftAlias<"rorv", RORVWr, GPR32>; 1963def : ShiftAlias<"rorv", RORVXr, GPR64>; 1964 1965// Multiply-add 1966let AddedComplexity = 5 in { 1967defm MADD : MulAccum<0, "madd">; 1968defm MSUB : MulAccum<1, "msub">; 1969 1970def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), 1971 (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1972def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), 1973 (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1974 1975def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), 1976 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1977def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), 1978 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1979def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), 1980 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1981def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), 1982 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1983} // AddedComplexity = 5 1984 1985let AddedComplexity = 5 in { 1986def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; 1987def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; 1988def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; 1989def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; 1990 1991def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))), 1992 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1993def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))), 1994 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1995def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), 1996 (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 1997def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))), 1998 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1999def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))), 2000 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2001def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), 2002 (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2003 2004def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), 2005 (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2006def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), 2007 (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2008 2009def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), 2010 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2011def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), 2012 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2013def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), 2014 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2015 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2016 2017def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 2018 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2019def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 2020 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2021def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), 2022 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2023 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2024 2025def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), 2026 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2027def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), 2028 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2029def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), 2030 GPR64:$Ra)), 2031 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2032 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2033 2034def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 2035 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2036def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 2037 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2038def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), 2039 (s64imm_32bit:$C)))), 2040 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2041 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2042 2043def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)), 2044 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2045def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))), 2046 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2047 2048def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)), 2049 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2050def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)), 2051 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2052 2053def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 2054 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2055def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 2056 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2057 2058def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 2059 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2060def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 2061 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2062 2063def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)), 2064 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2065def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))), 2066 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2067 2068def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)), 2069 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2070def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)), 2071 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2072 2073def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))), 2074 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2075def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 2076 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2077 2078def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))), 2079 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2080def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 2081 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2082} // AddedComplexity = 5 2083 2084def : MulAccumWAlias<"mul", MADDWrrr>; 2085def : MulAccumXAlias<"mul", MADDXrrr>; 2086def : MulAccumWAlias<"mneg", MSUBWrrr>; 2087def : MulAccumXAlias<"mneg", MSUBXrrr>; 2088def : WideMulAccumAlias<"smull", SMADDLrrr>; 2089def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; 2090def : WideMulAccumAlias<"umull", UMADDLrrr>; 2091def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; 2092 2093// Multiply-high 2094def SMULHrr : MulHi<0b010, "smulh", mulhs>; 2095def UMULHrr : MulHi<0b110, "umulh", mulhu>; 2096 2097// CRC32 2098def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; 2099def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; 2100def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; 2101def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; 2102 2103def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; 2104def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; 2105def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; 2106def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; 2107 2108// v8.1 atomic CAS 2109defm CAS : CompareAndSwap<0, 0, "">; 2110defm CASA : CompareAndSwap<1, 0, "a">; 2111defm CASL : CompareAndSwap<0, 1, "l">; 2112defm CASAL : CompareAndSwap<1, 1, "al">; 2113 2114// v8.1 atomic CASP 2115defm CASP : CompareAndSwapPair<0, 0, "">; 2116defm CASPA : CompareAndSwapPair<1, 0, "a">; 2117defm CASPL : CompareAndSwapPair<0, 1, "l">; 2118defm CASPAL : CompareAndSwapPair<1, 1, "al">; 2119 2120// v8.1 atomic SWP 2121defm SWP : Swap<0, 0, "">; 2122defm SWPA : Swap<1, 0, "a">; 2123defm SWPL : Swap<0, 1, "l">; 2124defm SWPAL : Swap<1, 1, "al">; 2125 2126// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) 2127defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; 2128defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; 2129defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; 2130defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; 2131 2132defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; 2133defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; 2134defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; 2135defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; 2136 2137defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; 2138defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; 2139defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; 2140defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; 2141 2142defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; 2143defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; 2144defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; 2145defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; 2146 2147defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; 2148defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; 2149defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; 2150defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; 2151 2152defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; 2153defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; 2154defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; 2155defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; 2156 2157defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; 2158defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; 2159defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; 2160defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; 2161 2162defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; 2163defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; 2164defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; 2165defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; 2166 2167// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" 2168defm : STOPregister<"stadd","LDADD">; // STADDx 2169defm : STOPregister<"stclr","LDCLR">; // STCLRx 2170defm : STOPregister<"steor","LDEOR">; // STEORx 2171defm : STOPregister<"stset","LDSET">; // STSETx 2172defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx 2173defm : STOPregister<"stsmin","LDSMIN">;// STSMINx 2174defm : STOPregister<"stumax","LDUMAX">;// STUMAXx 2175defm : STOPregister<"stumin","LDUMIN">;// STUMINx 2176 2177// v8.5 Memory Tagging Extension 2178let Predicates = [HasMTE] in { 2179 2180def IRG : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg", 2181 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>; 2182 2183def GMI : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi", 2184 int_aarch64_gmi, GPR64sp>, Sched<[]> { 2185 let isNotDuplicable = 1; 2186} 2187def ADDG : AddSubG<0, "addg", null_frag>; 2188def SUBG : AddSubG<1, "subg", null_frag>; 2189 2190def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>; 2191 2192def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>; 2193def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{ 2194 let Defs = [NZCV]; 2195} 2196 2197def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>; 2198 2199def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">; 2200 2201def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4), 2202 (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>; 2203def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2204 (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2205 2206def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>; 2207 2208def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]", 2209 (outs GPR64:$Rt), (ins GPR64sp:$Rn)>; 2210def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]", 2211 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>; 2212def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]", 2213 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> { 2214 let Inst{23} = 0; 2215} 2216 2217defm STG : MemTagStore<0b00, "stg">; 2218defm STZG : MemTagStore<0b01, "stzg">; 2219defm ST2G : MemTagStore<0b10, "st2g">; 2220defm STZ2G : MemTagStore<0b11, "stz2g">; 2221 2222def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2223 (STGi $Rn, $Rm, $imm)>; 2224def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2225 (STZGi $Rn, $Rm, $imm)>; 2226def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2227 (ST2Gi $Rn, $Rm, $imm)>; 2228def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2229 (STZ2Gi $Rn, $Rm, $imm)>; 2230 2231defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">; 2232def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">; 2233def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; 2234 2235def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2236 (STGi GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2237 2238def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2), 2239 (STGPi $Rt, $Rt2, $Rn, $imm)>; 2240 2241def IRGstack 2242 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>, 2243 Sched<[]>; 2244def TAGPstack 2245 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>, 2246 Sched<[]>; 2247 2248// Explicit SP in the first operand prevents ShrinkWrap optimization 2249// from leaving this instruction out of the stack frame. When IRGstack 2250// is transformed into IRG, this operand is replaced with the actual 2251// register / expression for the tagged base pointer of the current function. 2252def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; 2253 2254// Large STG to be expanded into a loop. $sz is the size, $Rn is start address. 2255// $Rn_wback is one past the end of the range. $Rm is the loop counter. 2256let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in { 2257def STGloop_wback 2258 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2259 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2260 Sched<[WriteAdr, WriteST]>; 2261 2262def STZGloop_wback 2263 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2264 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2265 Sched<[WriteAdr, WriteST]>; 2266 2267// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn. 2268// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back). 2269def STGloop 2270 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2271 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2272 Sched<[WriteAdr, WriteST]>; 2273 2274def STZGloop 2275 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2276 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2277 Sched<[WriteAdr, WriteST]>; 2278} 2279 2280} // Predicates = [HasMTE] 2281 2282//===----------------------------------------------------------------------===// 2283// Logical instructions. 2284//===----------------------------------------------------------------------===// 2285 2286// (immediate) 2287defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; 2288defm AND : LogicalImm<0b00, "and", and, "bic">; 2289defm EOR : LogicalImm<0b10, "eor", xor, "eon">; 2290defm ORR : LogicalImm<0b01, "orr", or, "orn">; 2291 2292// FIXME: these aliases *are* canonical sometimes (when movz can't be 2293// used). Actually, it seems to be working right now, but putting logical_immXX 2294// here is a bit dodgy on the AsmParser side too. 2295def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, 2296 logical_imm32:$imm), 0>; 2297def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, 2298 logical_imm64:$imm), 0>; 2299 2300 2301// (register) 2302defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; 2303defm BICS : LogicalRegS<0b11, 1, "bics", 2304 BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; 2305defm AND : LogicalReg<0b00, 0, "and", and>; 2306defm BIC : LogicalReg<0b00, 1, "bic", 2307 BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>; 2308defm EON : LogicalReg<0b10, 1, "eon", 2309 BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; 2310defm EOR : LogicalReg<0b10, 0, "eor", xor>; 2311defm ORN : LogicalReg<0b01, 1, "orn", 2312 BinOpFrag<(or node:$LHS, (not node:$RHS))>>; 2313defm ORR : LogicalReg<0b01, 0, "orr", or>; 2314 2315def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; 2316def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; 2317 2318def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; 2319def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; 2320 2321def : InstAlias<"mvn $Wd, $Wm$sh", 2322 (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; 2323def : InstAlias<"mvn $Xd, $Xm$sh", 2324 (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; 2325 2326def : InstAlias<"tst $src1, $src2", 2327 (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; 2328def : InstAlias<"tst $src1, $src2", 2329 (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; 2330 2331def : InstAlias<"tst $src1, $src2", 2332 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; 2333def : InstAlias<"tst $src1, $src2", 2334 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; 2335 2336def : InstAlias<"tst $src1, $src2$sh", 2337 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; 2338def : InstAlias<"tst $src1, $src2$sh", 2339 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; 2340 2341 2342def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; 2343def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; 2344 2345 2346//===----------------------------------------------------------------------===// 2347// One operand data processing instructions. 2348//===----------------------------------------------------------------------===// 2349 2350defm CLS : OneOperandData<0b000101, "cls">; 2351defm CLZ : OneOperandData<0b000100, "clz", ctlz>; 2352defm RBIT : OneOperandData<0b000000, "rbit", bitreverse>; 2353 2354def REV16Wr : OneWRegData<0b000001, "rev16", 2355 UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; 2356def REV16Xr : OneXRegData<0b000001, "rev16", null_frag>; 2357 2358def : Pat<(cttz GPR32:$Rn), 2359 (CLZWr (RBITWr GPR32:$Rn))>; 2360def : Pat<(cttz GPR64:$Rn), 2361 (CLZXr (RBITXr GPR64:$Rn))>; 2362def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), 2363 (i32 1))), 2364 (CLSWr GPR32:$Rn)>; 2365def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), 2366 (i64 1))), 2367 (CLSXr GPR64:$Rn)>; 2368def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>; 2369def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>; 2370 2371// Unlike the other one operand instructions, the instructions with the "rev" 2372// mnemonic do *not* just different in the size bit, but actually use different 2373// opcode bits for the different sizes. 2374def REVWr : OneWRegData<0b000010, "rev", bswap>; 2375def REVXr : OneXRegData<0b000011, "rev", bswap>; 2376def REV32Xr : OneXRegData<0b000010, "rev32", 2377 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; 2378 2379def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; 2380 2381// The bswap commutes with the rotr so we want a pattern for both possible 2382// orders. 2383def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; 2384def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; 2385 2386// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero. 2387def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; 2388def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; 2389 2390def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)), 2391 (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))), 2392 (REV16Xr GPR64:$Rn)>; 2393 2394//===----------------------------------------------------------------------===// 2395// Bitfield immediate extraction instruction. 2396//===----------------------------------------------------------------------===// 2397let hasSideEffects = 0 in 2398defm EXTR : ExtractImm<"extr">; 2399def : InstAlias<"ror $dst, $src, $shift", 2400 (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; 2401def : InstAlias<"ror $dst, $src, $shift", 2402 (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; 2403 2404def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), 2405 (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; 2406def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), 2407 (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; 2408 2409//===----------------------------------------------------------------------===// 2410// Other bitfield immediate instructions. 2411//===----------------------------------------------------------------------===// 2412let hasSideEffects = 0 in { 2413defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; 2414defm SBFM : BitfieldImm<0b00, "sbfm">; 2415defm UBFM : BitfieldImm<0b10, "ubfm">; 2416} 2417 2418def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2419 uint64_t enc = (32 - N->getZExtValue()) & 0x1f; 2420 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2421}]>; 2422 2423def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2424 uint64_t enc = 31 - N->getZExtValue(); 2425 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2426}]>; 2427 2428// min(7, 31 - shift_amt) 2429def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2430 uint64_t enc = 31 - N->getZExtValue(); 2431 enc = enc > 7 ? 7 : enc; 2432 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2433}]>; 2434 2435// min(15, 31 - shift_amt) 2436def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2437 uint64_t enc = 31 - N->getZExtValue(); 2438 enc = enc > 15 ? 15 : enc; 2439 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2440}]>; 2441 2442def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2443 uint64_t enc = (64 - N->getZExtValue()) & 0x3f; 2444 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2445}]>; 2446 2447def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2448 uint64_t enc = 63 - N->getZExtValue(); 2449 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2450}]>; 2451 2452// min(7, 63 - shift_amt) 2453def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2454 uint64_t enc = 63 - N->getZExtValue(); 2455 enc = enc > 7 ? 7 : enc; 2456 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2457}]>; 2458 2459// min(15, 63 - shift_amt) 2460def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2461 uint64_t enc = 63 - N->getZExtValue(); 2462 enc = enc > 15 ? 15 : enc; 2463 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2464}]>; 2465 2466// min(31, 63 - shift_amt) 2467def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ 2468 uint64_t enc = 63 - N->getZExtValue(); 2469 enc = enc > 31 ? 31 : enc; 2470 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2471}]>; 2472 2473def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), 2474 (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 2475 (i64 (i32shift_b imm0_31:$imm)))>; 2476def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), 2477 (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 2478 (i64 (i64shift_b imm0_63:$imm)))>; 2479 2480let AddedComplexity = 10 in { 2481def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), 2482 (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2483def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), 2484 (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2485} 2486 2487def : InstAlias<"asr $dst, $src, $shift", 2488 (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2489def : InstAlias<"asr $dst, $src, $shift", 2490 (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2491def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2492def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2493def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2494def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2495def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2496 2497def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), 2498 (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2499def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), 2500 (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2501 2502def : InstAlias<"lsr $dst, $src, $shift", 2503 (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2504def : InstAlias<"lsr $dst, $src, $shift", 2505 (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2506def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2507def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2508def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2509def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2510def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2511 2512//===----------------------------------------------------------------------===// 2513// Conditional comparison instructions. 2514//===----------------------------------------------------------------------===// 2515defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; 2516defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; 2517 2518//===----------------------------------------------------------------------===// 2519// Conditional select instructions. 2520//===----------------------------------------------------------------------===// 2521defm CSEL : CondSelect<0, 0b00, "csel">; 2522 2523def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; 2524defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; 2525defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; 2526defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; 2527 2528def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2529 (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2530def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2531 (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2532def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2533 (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2534def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2535 (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2536def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2537 (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2538def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2539 (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2540 2541def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), 2542 (CSINCWr WZR, WZR, (i32 imm:$cc))>; 2543def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), 2544 (CSINCXr XZR, XZR, (i32 imm:$cc))>; 2545def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), 2546 (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2547def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), 2548 (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2549def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), 2550 (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2551def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), 2552 (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2553def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), 2554 (CSINVWr WZR, WZR, (i32 imm:$cc))>; 2555def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), 2556 (CSINVXr XZR, XZR, (i32 imm:$cc))>; 2557def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), 2558 (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2559def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), 2560 (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2561def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), 2562 (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2563def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), 2564 (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2565 2566def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2567 (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>; 2568def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2569 (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>; 2570 2571def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2572 (CSINCWr GPR32:$val, WZR, imm:$cc)>; 2573def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2574 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2575def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2576 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2577 2578def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2579 (CSELWr WZR, GPR32:$val, imm:$cc)>; 2580def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2581 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2582def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2583 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2584 2585// The inverse of the condition code from the alias instruction is what is used 2586// in the aliased instruction. The parser all ready inverts the condition code 2587// for these aliases. 2588def : InstAlias<"cset $dst, $cc", 2589 (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2590def : InstAlias<"cset $dst, $cc", 2591 (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2592 2593def : InstAlias<"csetm $dst, $cc", 2594 (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2595def : InstAlias<"csetm $dst, $cc", 2596 (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2597 2598def : InstAlias<"cinc $dst, $src, $cc", 2599 (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2600def : InstAlias<"cinc $dst, $src, $cc", 2601 (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2602 2603def : InstAlias<"cinv $dst, $src, $cc", 2604 (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2605def : InstAlias<"cinv $dst, $src, $cc", 2606 (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2607 2608def : InstAlias<"cneg $dst, $src, $cc", 2609 (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2610def : InstAlias<"cneg $dst, $src, $cc", 2611 (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2612 2613//===----------------------------------------------------------------------===// 2614// PC-relative instructions. 2615//===----------------------------------------------------------------------===// 2616let isReMaterializable = 1 in { 2617let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 2618def ADR : ADRI<0, "adr", adrlabel, 2619 [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>; 2620} // hasSideEffects = 0 2621 2622def ADRP : ADRI<1, "adrp", adrplabel, 2623 [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; 2624} // isReMaterializable = 1 2625 2626// page address of a constant pool entry, block address 2627def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>; 2628def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>; 2629def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>; 2630def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>; 2631def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; 2632def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; 2633def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; 2634 2635//===----------------------------------------------------------------------===// 2636// Unconditional branch (register) instructions. 2637//===----------------------------------------------------------------------===// 2638 2639let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 2640def RET : BranchReg<0b0010, "ret", []>; 2641def DRPS : SpecialReturn<0b0101, "drps">; 2642def ERET : SpecialReturn<0b0100, "eret">; 2643} // isReturn = 1, isTerminator = 1, isBarrier = 1 2644 2645// Default to the LR register. 2646def : InstAlias<"ret", (RET LR)>; 2647 2648let isCall = 1, Defs = [LR], Uses = [SP] in { 2649 def BLR : BranchReg<0b0001, "blr", []>; 2650 def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>, 2651 Sched<[WriteBrReg]>, 2652 PseudoInstExpansion<(BLR GPR64:$Rn)>; 2653 def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, 2654 Sched<[WriteBrReg]>; 2655 def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>, 2656 Sched<[WriteBrReg]>; 2657} // isCall 2658 2659def : Pat<(AArch64call GPR64:$Rn), 2660 (BLR GPR64:$Rn)>, 2661 Requires<[NoSLSBLRMitigation]>; 2662def : Pat<(AArch64call GPR64noip:$Rn), 2663 (BLRNoIP GPR64noip:$Rn)>, 2664 Requires<[SLSBLRMitigation]>; 2665 2666def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), 2667 (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, 2668 Requires<[NoSLSBLRMitigation]>; 2669 2670def : Pat<(AArch64call_bti GPR64:$Rn), 2671 (BLR_BTI GPR64:$Rn)>, 2672 Requires<[NoSLSBLRMitigation]>; 2673def : Pat<(AArch64call_bti GPR64noip:$Rn), 2674 (BLR_BTI GPR64noip:$Rn)>, 2675 Requires<[SLSBLRMitigation]>; 2676 2677let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 2678def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; 2679} // isBranch, isTerminator, isBarrier, isIndirectBranch 2680 2681// Create a separate pseudo-instruction for codegen to use so that we don't 2682// flag lr as used in every function. It'll be restored before the RET by the 2683// epilogue if it's legitimately used. 2684def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>, 2685 Sched<[WriteBrReg]> { 2686 let isTerminator = 1; 2687 let isBarrier = 1; 2688 let isReturn = 1; 2689} 2690 2691// This is a directive-like pseudo-instruction. The purpose is to insert an 2692// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction 2693// (which in the usual case is a BLR). 2694let hasSideEffects = 1 in 2695def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { 2696 let AsmString = ".tlsdesccall $sym"; 2697} 2698 2699// Pseudo instruction to tell the streamer to emit a 'B' character into the 2700// augmentation string. 2701def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {} 2702 2703// Pseudo instruction to tell the streamer to emit a 'G' character into the 2704// augmentation string. 2705def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {} 2706 2707// FIXME: maybe the scratch register used shouldn't be fixed to X1? 2708// FIXME: can "hasSideEffects be dropped? 2709// This gets lowered to an instruction sequence which takes 16 bytes 2710let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16, 2711 isCodeGenOnly = 1 in 2712def TLSDESC_CALLSEQ 2713 : Pseudo<(outs), (ins i64imm:$sym), 2714 [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, 2715 Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; 2716def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), 2717 (TLSDESC_CALLSEQ texternalsym:$sym)>; 2718 2719//===----------------------------------------------------------------------===// 2720// Conditional branch (immediate) instruction. 2721//===----------------------------------------------------------------------===// 2722def Bcc : BranchCond<0, "b">; 2723 2724// Armv8.8-A variant form which hints to the branch predictor that 2725// this branch is very likely to go the same way nearly all the time 2726// (even though it is not known at compile time _which_ way that is). 2727def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; 2728 2729//===----------------------------------------------------------------------===// 2730// Compare-and-branch instructions. 2731//===----------------------------------------------------------------------===// 2732defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; 2733defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; 2734 2735//===----------------------------------------------------------------------===// 2736// Test-bit-and-branch instructions. 2737//===----------------------------------------------------------------------===// 2738defm TBZ : TestBranch<0, "tbz", AArch64tbz>; 2739defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; 2740 2741//===----------------------------------------------------------------------===// 2742// Unconditional branch (immediate) instructions. 2743//===----------------------------------------------------------------------===// 2744let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 2745def B : BranchImm<0, "b", [(br bb:$addr)]>; 2746} // isBranch, isTerminator, isBarrier 2747 2748let isCall = 1, Defs = [LR], Uses = [SP] in { 2749def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; 2750} // isCall 2751def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; 2752 2753//===----------------------------------------------------------------------===// 2754// Exception generation instructions. 2755//===----------------------------------------------------------------------===// 2756let isTrap = 1 in { 2757def BRK : ExceptionGeneration<0b001, 0b00, "brk", 2758 [(int_aarch64_break timm32_0_65535:$imm)]>; 2759} 2760def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; 2761def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; 2762def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>; 2763def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; 2764def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; 2765def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>; 2766def SVC : ExceptionGeneration<0b000, 0b01, "svc">; 2767 2768// DCPSn defaults to an immediate operand of zero if unspecified. 2769def : InstAlias<"dcps1", (DCPS1 0)>; 2770def : InstAlias<"dcps2", (DCPS2 0)>; 2771def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>; 2772 2773def UDF : UDFType<0, "udf">; 2774 2775//===----------------------------------------------------------------------===// 2776// Load instructions. 2777//===----------------------------------------------------------------------===// 2778 2779// Pair (indexed, offset) 2780defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; 2781defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; 2782defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">; 2783defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">; 2784defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">; 2785 2786defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2787 2788// Pair (pre-indexed) 2789def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2790def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2791def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2792def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2793def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2794 2795def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2796 2797// Pair (post-indexed) 2798def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2799def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2800def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2801def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2802def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2803 2804def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2805 2806 2807// Pair (no allocate) 2808defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">; 2809defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">; 2810defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">; 2811defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">; 2812defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">; 2813 2814def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 2815 (LDPXi GPR64sp:$Rn, simm7s8:$offset)>; 2816 2817def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 2818 (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>; 2819//--- 2820// (register offset) 2821//--- 2822 2823// Integer 2824defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; 2825defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; 2826defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; 2827defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; 2828 2829// Floating-point 2830defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", i8, load>; 2831defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; 2832defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; 2833defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; 2834defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>; 2835 2836// Load sign-extended half-word 2837defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; 2838defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; 2839 2840// Load sign-extended byte 2841defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; 2842defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; 2843 2844// Load sign-extended word 2845defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; 2846 2847// Pre-fetch. 2848defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; 2849 2850// For regular load, we do not have any alignment requirement. 2851// Thus, it is safe to directly map the vector loads with interesting 2852// addressing modes. 2853// FIXME: We could do the same for bitconvert to floating point vectors. 2854multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, 2855 ValueType ScalTy, ValueType VecTy, 2856 Instruction LOADW, Instruction LOADX, 2857 SubRegIndex sub> { 2858 def : Pat<(VecTy (scalar_to_vector (ScalTy 2859 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), 2860 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 2861 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), 2862 sub)>; 2863 2864 def : Pat<(VecTy (scalar_to_vector (ScalTy 2865 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), 2866 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 2867 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), 2868 sub)>; 2869} 2870 2871let AddedComplexity = 10 in { 2872defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; 2873defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; 2874 2875defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; 2876defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; 2877 2878defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>; 2879defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>; 2880 2881defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; 2882defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; 2883 2884defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; 2885defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; 2886 2887defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; 2888 2889defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; 2890 2891 2892def : Pat <(v1i64 (scalar_to_vector (i64 2893 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, 2894 ro_Wextend64:$extend))))), 2895 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; 2896 2897def : Pat <(v1i64 (scalar_to_vector (i64 2898 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, 2899 ro_Xextend64:$extend))))), 2900 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; 2901} 2902 2903// Match all load 64 bits width whose type is compatible with FPR64 2904multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, 2905 Instruction LOADW, Instruction LOADX> { 2906 2907 def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2908 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 2909 2910 def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2911 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 2912} 2913 2914let AddedComplexity = 10 in { 2915let Predicates = [IsLE] in { 2916 // We must do vector loads with LD1 in big-endian. 2917 defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; 2918 defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; 2919 defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; 2920 defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; 2921 defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>; 2922 defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>; 2923} 2924 2925defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; 2926defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; 2927 2928// Match all load 128 bits width whose type is compatible with FPR128 2929let Predicates = [IsLE] in { 2930 // We must do vector loads with LD1 in big-endian. 2931 defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; 2932 defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; 2933 defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; 2934 defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; 2935 defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; 2936 defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>; 2937 defm : VecROLoadPat<ro128, v8bf16, LDRQroW, LDRQroX>; 2938 defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; 2939} 2940} // AddedComplexity = 10 2941 2942// zextload -> i64 2943multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, 2944 Instruction INSTW, Instruction INSTX> { 2945 def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2946 (SUBREG_TO_REG (i64 0), 2947 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 2948 sub_32)>; 2949 2950 def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2951 (SUBREG_TO_REG (i64 0), 2952 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 2953 sub_32)>; 2954} 2955 2956let AddedComplexity = 10 in { 2957 defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; 2958 defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; 2959 defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; 2960 2961 // zextloadi1 -> zextloadi8 2962 defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 2963 2964 // extload -> zextload 2965 defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 2966 defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 2967 defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 2968 2969 // extloadi1 -> zextloadi8 2970 defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; 2971} 2972 2973 2974// zextload -> i64 2975multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, 2976 Instruction INSTW, Instruction INSTX> { 2977 def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2978 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 2979 2980 def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2981 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 2982 2983} 2984 2985let AddedComplexity = 10 in { 2986 // extload -> zextload 2987 defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 2988 defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 2989 defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 2990 2991 // zextloadi1 -> zextloadi8 2992 defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 2993} 2994 2995//--- 2996// (unsigned immediate) 2997//--- 2998defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr", 2999 [(set GPR64z:$Rt, 3000 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 3001defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr", 3002 [(set GPR32z:$Rt, 3003 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 3004defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", 3005 [(set FPR8Op:$Rt, 3006 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; 3007defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", 3008 [(set (f16 FPR16Op:$Rt), 3009 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; 3010defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr", 3011 [(set (f32 FPR32Op:$Rt), 3012 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 3013defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr", 3014 [(set (f64 FPR64Op:$Rt), 3015 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 3016defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr", 3017 [(set (f128 FPR128Op:$Rt), 3018 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; 3019 3020// bf16 load pattern 3021def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3022 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 3023 3024// For regular load, we do not have any alignment requirement. 3025// Thus, it is safe to directly map the vector loads with interesting 3026// addressing modes. 3027// FIXME: We could do the same for bitconvert to floating point vectors. 3028def : Pat <(v8i8 (scalar_to_vector (i32 3029 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3030 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 3031 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 3032def : Pat <(v16i8 (scalar_to_vector (i32 3033 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3034 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3035 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 3036def : Pat <(v4i16 (scalar_to_vector (i32 3037 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3038 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 3039 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 3040def : Pat <(v8i16 (scalar_to_vector (i32 3041 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3042 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 3043 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 3044def : Pat <(v2i32 (scalar_to_vector (i32 3045 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3046 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 3047 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 3048def : Pat <(v4i32 (scalar_to_vector (i32 3049 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3050 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 3051 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 3052def : Pat <(v1i64 (scalar_to_vector (i64 3053 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 3054 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3055def : Pat <(v2i64 (scalar_to_vector (i64 3056 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 3057 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 3058 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; 3059 3060// Match all load 64 bits width whose type is compatible with FPR64 3061let Predicates = [IsLE] in { 3062 // We must use LD1 to perform vector loads in big-endian. 3063 def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3064 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3065 def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3066 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3067 def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3068 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3069 def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3070 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3071 def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3072 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3073 def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3074 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3075} 3076def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3077 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3078def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3079 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3080 3081// Match all load 128 bits width whose type is compatible with FPR128 3082let Predicates = [IsLE] in { 3083 // We must use LD1 to perform vector loads in big-endian. 3084 def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3085 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3086 def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3087 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3088 def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3089 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3090 def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3091 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3092 def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3093 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3094 def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3095 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3096 def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3097 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3098 def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3099 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3100} 3101def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3102 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3103 3104defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", 3105 [(set GPR32:$Rt, 3106 (zextloadi16 (am_indexed16 GPR64sp:$Rn, 3107 uimm12s2:$offset)))]>; 3108defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", 3109 [(set GPR32:$Rt, 3110 (zextloadi8 (am_indexed8 GPR64sp:$Rn, 3111 uimm12s1:$offset)))]>; 3112// zextload -> i64 3113def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3114 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3115def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3116 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3117 3118// zextloadi1 -> zextloadi8 3119def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3120 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3121def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3122 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3123 3124// extload -> zextload 3125def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3126 (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; 3127def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3128 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3129def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3130 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3131def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3132 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3133def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3134 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3135def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3136 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3137def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3138 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3139 3140// load sign-extended half-word 3141defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", 3142 [(set GPR32:$Rt, 3143 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3144 uimm12s2:$offset)))]>; 3145defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", 3146 [(set GPR64:$Rt, 3147 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3148 uimm12s2:$offset)))]>; 3149 3150// load sign-extended byte 3151defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", 3152 [(set GPR32:$Rt, 3153 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3154 uimm12s1:$offset)))]>; 3155defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", 3156 [(set GPR64:$Rt, 3157 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3158 uimm12s1:$offset)))]>; 3159 3160// load sign-extended word 3161defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", 3162 [(set GPR64:$Rt, 3163 (sextloadi32 (am_indexed32 GPR64sp:$Rn, 3164 uimm12s4:$offset)))]>; 3165 3166// load zero-extended word 3167def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3168 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3169 3170// Pre-fetch. 3171def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", 3172 [(AArch64Prefetch timm:$Rt, 3173 (am_indexed64 GPR64sp:$Rn, 3174 uimm12s8:$offset))]>; 3175 3176def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; 3177 3178//--- 3179// (literal) 3180 3181def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{ 3182 if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) { 3183 const DataLayout &DL = MF->getDataLayout(); 3184 Align Align = G->getGlobal()->getPointerAlignment(DL); 3185 return Align >= 4 && G->getOffset() % 4 == 0; 3186 } 3187 if (auto *C = dyn_cast<ConstantPoolSDNode>(N)) 3188 return C->getAlign() >= 4 && C->getOffset() % 4 == 0; 3189 return false; 3190}]>; 3191 3192def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr", 3193 [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3194def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr", 3195 [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3196def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr", 3197 [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3198def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr", 3199 [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3200def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr", 3201 [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3202 3203// load sign-extended word 3204def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw", 3205 [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>; 3206 3207let AddedComplexity = 20 in { 3208def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))), 3209 (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>; 3210} 3211 3212// prefetch 3213def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; 3214// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; 3215 3216//--- 3217// (unscaled immediate) 3218defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur", 3219 [(set GPR64z:$Rt, 3220 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3221defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur", 3222 [(set GPR32z:$Rt, 3223 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3224defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", 3225 [(set FPR8Op:$Rt, 3226 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3227defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", 3228 [(set (f16 FPR16Op:$Rt), 3229 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3230defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", 3231 [(set (f32 FPR32Op:$Rt), 3232 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3233defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur", 3234 [(set (f64 FPR64Op:$Rt), 3235 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3236defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur", 3237 [(set (f128 FPR128Op:$Rt), 3238 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; 3239 3240defm LDURHH 3241 : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", 3242 [(set GPR32:$Rt, 3243 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3244defm LDURBB 3245 : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", 3246 [(set GPR32:$Rt, 3247 (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3248 3249// bf16 load pattern 3250def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3251 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 3252 3253// Match all load 64 bits width whose type is compatible with FPR64 3254let Predicates = [IsLE] in { 3255 def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3256 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3257 def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3258 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3259 def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3260 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3261 def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3262 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3263 def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3264 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3265} 3266def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3267 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3268def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3269 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3270 3271// Match all load 128 bits width whose type is compatible with FPR128 3272let Predicates = [IsLE] in { 3273 def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3274 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3275 def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3276 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3277 def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3278 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3279 def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3280 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3281 def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3282 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3283 def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3284 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3285 def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3286 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3287} 3288 3289// anyext -> zext 3290def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3291 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3292def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3293 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3294def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3295 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3296def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3297 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3298def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3299 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3300def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3301 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3302def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3303 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3304// unscaled zext 3305def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3306 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3307def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3308 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3309def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3310 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3311def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3312 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3313def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3314 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3315def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3316 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3317def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3318 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3319 3320 3321//--- 3322// LDR mnemonics fall back to LDUR for negative or unaligned offsets. 3323 3324// Define new assembler match classes as we want to only match these when 3325// the don't otherwise match the scaled addressing mode for LDR/STR. Don't 3326// associate a DiagnosticType either, as we want the diagnostic for the 3327// canonical form (the scaled operand) to take precedence. 3328class SImm9OffsetOperand<int Width> : AsmOperandClass { 3329 let Name = "SImm9OffsetFB" # Width; 3330 let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; 3331 let RenderMethod = "addImmOperands"; 3332} 3333 3334def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; 3335def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; 3336def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; 3337def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; 3338def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; 3339 3340def simm9_offset_fb8 : Operand<i64> { 3341 let ParserMatchClass = SImm9OffsetFB8Operand; 3342} 3343def simm9_offset_fb16 : Operand<i64> { 3344 let ParserMatchClass = SImm9OffsetFB16Operand; 3345} 3346def simm9_offset_fb32 : Operand<i64> { 3347 let ParserMatchClass = SImm9OffsetFB32Operand; 3348} 3349def simm9_offset_fb64 : Operand<i64> { 3350 let ParserMatchClass = SImm9OffsetFB64Operand; 3351} 3352def simm9_offset_fb128 : Operand<i64> { 3353 let ParserMatchClass = SImm9OffsetFB128Operand; 3354} 3355 3356def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3357 (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3358def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3359 (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3360def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3361 (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3362def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3363 (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3364def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3365 (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3366def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3367 (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3368def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3369 (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 3370 3371// zextload -> i64 3372def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3373 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3374def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3375 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3376 3377// load sign-extended half-word 3378defm LDURSHW 3379 : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", 3380 [(set GPR32:$Rt, 3381 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3382defm LDURSHX 3383 : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", 3384 [(set GPR64:$Rt, 3385 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3386 3387// load sign-extended byte 3388defm LDURSBW 3389 : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", 3390 [(set GPR32:$Rt, 3391 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3392defm LDURSBX 3393 : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", 3394 [(set GPR64:$Rt, 3395 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3396 3397// load sign-extended word 3398defm LDURSW 3399 : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", 3400 [(set GPR64:$Rt, 3401 (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3402 3403// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. 3404def : InstAlias<"ldrb $Rt, [$Rn, $offset]", 3405 (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3406def : InstAlias<"ldrh $Rt, [$Rn, $offset]", 3407 (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3408def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3409 (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3410def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3411 (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3412def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3413 (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3414def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3415 (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3416def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", 3417 (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3418 3419// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros, 3420// load, 0) can use a single load. 3421multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT, 3422 ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst, 3423 ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm, 3424 SubRegIndex SubReg> { 3425 // Scaled 3426 def : Pat <(vector_insert (VT immAllZerosV), 3427 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3428 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3429 // Unscaled 3430 def : Pat <(vector_insert (VT immAllZerosV), 3431 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3432 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3433 3434 // Half-vector patterns 3435 def : Pat <(vector_insert (HVT immAllZerosV), 3436 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3437 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3438 // Unscaled 3439 def : Pat <(vector_insert (HVT immAllZerosV), 3440 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3441 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3442 3443 // SVE patterns 3444 def : Pat <(vector_insert (SVT immAllZerosV), 3445 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3446 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3447 // Unscaled 3448 def : Pat <(vector_insert (SVT immAllZerosV), 3449 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3450 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3451} 3452 3453defm : LoadInsertZeroPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32, LDRBui, LDURBi, 3454 am_indexed8, am_unscaled8, uimm12s1, bsub>; 3455defm : LoadInsertZeroPatterns<extloadi16, v8i16, v4i16, nxv8i16, i32, LDRHui, LDURHi, 3456 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3457defm : LoadInsertZeroPatterns<load, v4i32, v2i32, nxv4i32, i32, LDRSui, LDURSi, 3458 am_indexed32, am_unscaled32, uimm12s4, ssub>; 3459defm : LoadInsertZeroPatterns<load, v2i64, v1i64, nxv2i64, i64, LDRDui, LDURDi, 3460 am_indexed64, am_unscaled64, uimm12s8, dsub>; 3461defm : LoadInsertZeroPatterns<load, v8f16, v4f16, nxv8f16, f16, LDRHui, LDURHi, 3462 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3463defm : LoadInsertZeroPatterns<load, v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi, 3464 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3465defm : LoadInsertZeroPatterns<load, v4f32, v2f32, nxv4f32, f32, LDRSui, LDURSi, 3466 am_indexed32, am_unscaled32, uimm12s4, ssub>; 3467defm : LoadInsertZeroPatterns<load, v2f64, v1f64, nxv2f64, f64, LDRDui, LDURDi, 3468 am_indexed64, am_unscaled64, uimm12s8, dsub>; 3469 3470// Pre-fetch. 3471defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", 3472 [(AArch64Prefetch timm:$Rt, 3473 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3474 3475//--- 3476// (unscaled immediate, unprivileged) 3477defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; 3478defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; 3479 3480defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; 3481defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; 3482 3483// load sign-extended half-word 3484defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; 3485defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; 3486 3487// load sign-extended byte 3488defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; 3489defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; 3490 3491// load sign-extended word 3492defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; 3493 3494//--- 3495// (immediate pre-indexed) 3496def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3497def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3498def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3499def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3500def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3501def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3502def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3503 3504// load sign-extended half-word 3505def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3506def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3507 3508// load sign-extended byte 3509def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3510def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3511 3512// load zero-extended byte 3513def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3514def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3515 3516// load sign-extended word 3517def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3518 3519//--- 3520// (immediate post-indexed) 3521def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3522def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3523def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3524def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3525def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3526def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3527def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3528 3529// load sign-extended half-word 3530def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3531def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3532 3533// load sign-extended byte 3534def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3535def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3536 3537// load zero-extended byte 3538def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3539def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3540 3541// load sign-extended word 3542def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3543 3544//===----------------------------------------------------------------------===// 3545// Store instructions. 3546//===----------------------------------------------------------------------===// 3547 3548// Pair (indexed, offset) 3549// FIXME: Use dedicated range-checked addressing mode operand here. 3550defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">; 3551defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">; 3552defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">; 3553defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">; 3554defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">; 3555 3556// Pair (pre-indexed) 3557def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3558def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3559def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3560def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3561def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3562 3563// Pair (post-indexed) 3564def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3565def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3566def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3567def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3568def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3569 3570// Pair (no allocate) 3571defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">; 3572defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">; 3573defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">; 3574defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">; 3575defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">; 3576 3577def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 3578 (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>; 3579 3580def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 3581 (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>; 3582 3583 3584//--- 3585// (Register offset) 3586 3587// Integer 3588defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; 3589defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; 3590defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; 3591defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; 3592 3593 3594// Floating-point 3595defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", i8, store>; 3596defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; 3597defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; 3598defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; 3599defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">; 3600 3601let Predicates = [UseSTRQro], AddedComplexity = 10 in { 3602 def : Pat<(store (f128 FPR128:$Rt), 3603 (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, 3604 ro_Wextend128:$extend)), 3605 (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; 3606 def : Pat<(store (f128 FPR128:$Rt), 3607 (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, 3608 ro_Xextend128:$extend)), 3609 (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; 3610} 3611 3612multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop, 3613 Instruction STRW, Instruction STRX> { 3614 3615 def : Pat<(storeop GPR64:$Rt, 3616 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3617 (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3618 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3619 3620 def : Pat<(storeop GPR64:$Rt, 3621 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3622 (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3623 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3624} 3625 3626let AddedComplexity = 10 in { 3627 // truncstore i64 3628 defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>; 3629 defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>; 3630 defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>; 3631} 3632 3633multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR, 3634 Instruction STRW, Instruction STRX> { 3635 def : Pat<(store (VecTy FPR:$Rt), 3636 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3637 (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3638 3639 def : Pat<(store (VecTy FPR:$Rt), 3640 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3641 (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3642} 3643 3644let AddedComplexity = 10 in { 3645// Match all store 64 bits width whose type is compatible with FPR64 3646let Predicates = [IsLE] in { 3647 // We must use ST1 to store vectors in big-endian. 3648 defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>; 3649 defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>; 3650 defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>; 3651 defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>; 3652 defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>; 3653 defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>; 3654} 3655 3656defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>; 3657defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>; 3658 3659// Match all store 128 bits width whose type is compatible with FPR128 3660let Predicates = [IsLE, UseSTRQro] in { 3661 // We must use ST1 to store vectors in big-endian. 3662 defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>; 3663 defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>; 3664 defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>; 3665 defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>; 3666 defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>; 3667 defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>; 3668 defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>; 3669 defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>; 3670} 3671} // AddedComplexity = 10 3672 3673// Match stores from lane 0 to the appropriate subreg's store. 3674multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop, 3675 ValueType VecTy, ValueType STy, 3676 SubRegIndex SubRegIdx, 3677 Instruction STRW, Instruction STRX> { 3678 3679 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), 3680 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3681 (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3682 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3683 3684 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), 3685 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3686 (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3687 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3688} 3689 3690let AddedComplexity = 19 in { 3691 defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>; 3692 defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, hsub, STRHroW, STRHroX>; 3693 defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, ssub, STRSroW, STRSroX>; 3694 defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, ssub, STRSroW, STRSroX>; 3695 defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, dsub, STRDroW, STRDroX>; 3696 defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, dsub, STRDroW, STRDroX>; 3697} 3698 3699//--- 3700// (unsigned immediate) 3701defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", 3702 [(store GPR64z:$Rt, 3703 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3704defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", 3705 [(store GPR32z:$Rt, 3706 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3707defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", 3708 [(store FPR8Op:$Rt, 3709 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; 3710defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", 3711 [(store (f16 FPR16Op:$Rt), 3712 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; 3713defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str", 3714 [(store (f32 FPR32Op:$Rt), 3715 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3716defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str", 3717 [(store (f64 FPR64Op:$Rt), 3718 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3719defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>; 3720 3721defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", 3722 [(truncstorei16 GPR32z:$Rt, 3723 (am_indexed16 GPR64sp:$Rn, 3724 uimm12s2:$offset))]>; 3725defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", 3726 [(truncstorei8 GPR32z:$Rt, 3727 (am_indexed8 GPR64sp:$Rn, 3728 uimm12s1:$offset))]>; 3729 3730// bf16 store pattern 3731def : Pat<(store (bf16 FPR16Op:$Rt), 3732 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3733 (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>; 3734 3735let AddedComplexity = 10 in { 3736 3737// Match all store 64 bits width whose type is compatible with FPR64 3738def : Pat<(store (v1i64 FPR64:$Rt), 3739 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3740 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3741def : Pat<(store (v1f64 FPR64:$Rt), 3742 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3743 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3744 3745let Predicates = [IsLE] in { 3746 // We must use ST1 to store vectors in big-endian. 3747 def : Pat<(store (v2f32 FPR64:$Rt), 3748 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3749 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3750 def : Pat<(store (v8i8 FPR64:$Rt), 3751 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3752 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3753 def : Pat<(store (v4i16 FPR64:$Rt), 3754 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3755 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3756 def : Pat<(store (v2i32 FPR64:$Rt), 3757 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3758 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3759 def : Pat<(store (v4f16 FPR64:$Rt), 3760 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3761 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3762 def : Pat<(store (v4bf16 FPR64:$Rt), 3763 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3764 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3765} 3766 3767// Match all store 128 bits width whose type is compatible with FPR128 3768def : Pat<(store (f128 FPR128:$Rt), 3769 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3770 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3771 3772let Predicates = [IsLE] in { 3773 // We must use ST1 to store vectors in big-endian. 3774 def : Pat<(store (v4f32 FPR128:$Rt), 3775 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3776 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3777 def : Pat<(store (v2f64 FPR128:$Rt), 3778 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3779 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3780 def : Pat<(store (v16i8 FPR128:$Rt), 3781 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3782 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3783 def : Pat<(store (v8i16 FPR128:$Rt), 3784 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3785 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3786 def : Pat<(store (v4i32 FPR128:$Rt), 3787 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3788 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3789 def : Pat<(store (v2i64 FPR128:$Rt), 3790 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3791 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3792 def : Pat<(store (v8f16 FPR128:$Rt), 3793 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3794 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3795 def : Pat<(store (v8bf16 FPR128:$Rt), 3796 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3797 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3798} 3799 3800// truncstore i64 3801def : Pat<(truncstorei32 GPR64:$Rt, 3802 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), 3803 (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; 3804def : Pat<(truncstorei16 GPR64:$Rt, 3805 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3806 (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; 3807def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), 3808 (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; 3809 3810} // AddedComplexity = 10 3811 3812// Match stores from lane 0 to the appropriate subreg's store. 3813multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop, 3814 ValueType VTy, ValueType STy, 3815 SubRegIndex SubRegIdx, Operand IndexType, 3816 Instruction STR> { 3817 def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)), 3818 (UIAddrMode GPR64sp:$Rn, IndexType:$offset)), 3819 (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3820 GPR64sp:$Rn, IndexType:$offset)>; 3821} 3822 3823let AddedComplexity = 19 in { 3824 defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>; 3825 defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, hsub, uimm12s2, STRHui>; 3826 defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, ssub, uimm12s4, STRSui>; 3827 defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, ssub, uimm12s4, STRSui>; 3828 defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, dsub, uimm12s8, STRDui>; 3829 defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, dsub, uimm12s8, STRDui>; 3830} 3831 3832//--- 3833// (unscaled immediate) 3834defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur", 3835 [(store GPR64z:$Rt, 3836 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3837defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur", 3838 [(store GPR32z:$Rt, 3839 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 3840defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", 3841 [(store FPR8Op:$Rt, 3842 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 3843defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", 3844 [(store (f16 FPR16Op:$Rt), 3845 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 3846defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur", 3847 [(store (f32 FPR32Op:$Rt), 3848 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 3849defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur", 3850 [(store (f64 FPR64Op:$Rt), 3851 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3852defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur", 3853 [(store (f128 FPR128Op:$Rt), 3854 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; 3855defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh", 3856 [(truncstorei16 GPR32z:$Rt, 3857 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 3858defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb", 3859 [(truncstorei8 GPR32z:$Rt, 3860 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 3861 3862// Armv8.4 Weaker Release Consistency enhancements 3863// LDAPR & STLR with Immediate Offset instructions 3864let Predicates = [HasRCPC_IMMO] in { 3865defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; 3866defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; 3867defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; 3868defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>; 3869defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>; 3870defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>; 3871defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>; 3872defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>; 3873defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>; 3874defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>; 3875defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>; 3876defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>; 3877defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>; 3878} 3879 3880// Match all store 64 bits width whose type is compatible with FPR64 3881def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3882 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3883def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3884 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3885 3886let AddedComplexity = 10 in { 3887 3888let Predicates = [IsLE] in { 3889 // We must use ST1 to store vectors in big-endian. 3890 def : Pat<(store (v2f32 FPR64:$Rt), 3891 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3892 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3893 def : Pat<(store (v8i8 FPR64:$Rt), 3894 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3895 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3896 def : Pat<(store (v4i16 FPR64:$Rt), 3897 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3898 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3899 def : Pat<(store (v2i32 FPR64:$Rt), 3900 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3901 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3902 def : Pat<(store (v4f16 FPR64:$Rt), 3903 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3904 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3905 def : Pat<(store (v4bf16 FPR64:$Rt), 3906 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3907 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3908} 3909 3910// Match all store 128 bits width whose type is compatible with FPR128 3911def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3912 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3913 3914let Predicates = [IsLE] in { 3915 // We must use ST1 to store vectors in big-endian. 3916 def : Pat<(store (v4f32 FPR128:$Rt), 3917 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3918 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3919 def : Pat<(store (v2f64 FPR128:$Rt), 3920 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3921 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3922 def : Pat<(store (v16i8 FPR128:$Rt), 3923 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3924 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3925 def : Pat<(store (v8i16 FPR128:$Rt), 3926 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3927 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3928 def : Pat<(store (v4i32 FPR128:$Rt), 3929 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3930 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3931 def : Pat<(store (v2i64 FPR128:$Rt), 3932 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3933 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3934 def : Pat<(store (v2f64 FPR128:$Rt), 3935 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3936 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3937 def : Pat<(store (v8f16 FPR128:$Rt), 3938 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3939 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3940 def : Pat<(store (v8bf16 FPR128:$Rt), 3941 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3942 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3943} 3944 3945} // AddedComplexity = 10 3946 3947// unscaled i64 truncating stores 3948def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), 3949 (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3950def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 3951 (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3952def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), 3953 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3954 3955// Match stores from lane 0 to the appropriate subreg's store. 3956multiclass VecStoreULane0Pat<SDPatternOperator StoreOp, 3957 ValueType VTy, ValueType STy, 3958 SubRegIndex SubRegIdx, Instruction STR> { 3959 defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>; 3960} 3961 3962let AddedComplexity = 19 in { 3963 defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>; 3964 defm : VecStoreULane0Pat<store, v8f16, f16, hsub, STURHi>; 3965 defm : VecStoreULane0Pat<store, v4i32, i32, ssub, STURSi>; 3966 defm : VecStoreULane0Pat<store, v4f32, f32, ssub, STURSi>; 3967 defm : VecStoreULane0Pat<store, v2i64, i64, dsub, STURDi>; 3968 defm : VecStoreULane0Pat<store, v2f64, f64, dsub, STURDi>; 3969} 3970 3971//--- 3972// STR mnemonics fall back to STUR for negative or unaligned offsets. 3973def : InstAlias<"str $Rt, [$Rn, $offset]", 3974 (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3975def : InstAlias<"str $Rt, [$Rn, $offset]", 3976 (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3977def : InstAlias<"str $Rt, [$Rn, $offset]", 3978 (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3979def : InstAlias<"str $Rt, [$Rn, $offset]", 3980 (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3981def : InstAlias<"str $Rt, [$Rn, $offset]", 3982 (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3983def : InstAlias<"str $Rt, [$Rn, $offset]", 3984 (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3985def : InstAlias<"str $Rt, [$Rn, $offset]", 3986 (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 3987 3988def : InstAlias<"strb $Rt, [$Rn, $offset]", 3989 (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3990def : InstAlias<"strh $Rt, [$Rn, $offset]", 3991 (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3992 3993//--- 3994// (unscaled immediate, unprivileged) 3995defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; 3996defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; 3997 3998defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; 3999defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; 4000 4001//--- 4002// (immediate pre-indexed) 4003def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; 4004def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; 4005def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, i8>; 4006def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; 4007def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; 4008def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; 4009def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; 4010 4011def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; 4012def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; 4013 4014// truncstore i64 4015def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4016 (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4017 simm9:$off)>; 4018def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4019 (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4020 simm9:$off)>; 4021def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4022 (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4023 simm9:$off)>; 4024 4025def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4026 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4027def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4028 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4029def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4030 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4031def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4032 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4033def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4034 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4035def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4036 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4037def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4038 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4039 4040def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4041 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4042def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4043 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4044def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4045 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4046def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4047 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4048def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4049 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4050def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4051 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4052def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4053 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4054 4055//--- 4056// (immediate post-indexed) 4057def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; 4058def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; 4059def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, i8>; 4060def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; 4061def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; 4062def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; 4063def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>; 4064 4065def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>; 4066def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>; 4067 4068// truncstore i64 4069def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4070 (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4071 simm9:$off)>; 4072def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4073 (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4074 simm9:$off)>; 4075def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4076 (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4077 simm9:$off)>; 4078 4079def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), 4080 (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; 4081 4082def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4083 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4084def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4085 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4086def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4087 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4088def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4089 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4090def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4091 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4092def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4093 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4094def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4095 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4096def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4097 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4098 4099def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4100 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4101def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4102 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4103def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4104 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4105def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4106 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4107def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4108 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4109def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4110 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4111def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4112 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4113def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4114 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4115 4116//===----------------------------------------------------------------------===// 4117// Load/store exclusive instructions. 4118//===----------------------------------------------------------------------===// 4119 4120def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; 4121def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; 4122def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; 4123def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; 4124 4125def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; 4126def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; 4127def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; 4128def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; 4129 4130def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; 4131def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; 4132def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; 4133def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; 4134 4135def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; 4136def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; 4137def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; 4138def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; 4139 4140/* 4141Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn 4142of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an 4143alias for the case of immediate #0. This is because new STLR versions (from 4144LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not 4145appropriate anymore (it parses and discards the optional zero). This is not the 4146case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed, 4147and the immediate values are not inside the [] brackets and thus not accepted 4148by GPR64sp0 parser. 4149*/ 4150def STLRW0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW GPR32: $Rt, GPR64sp:$Rn)>; 4151def STLRX0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX GPR64: $Rt, GPR64sp:$Rn)>; 4152def STLRB0 : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB GPR32: $Rt, GPR64sp:$Rn)>; 4153def STLRH0 : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH GPR32: $Rt, GPR64sp:$Rn)>; 4154 4155def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; 4156def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; 4157def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; 4158def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; 4159 4160def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; 4161def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; 4162def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; 4163def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; 4164 4165def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; 4166def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; 4167 4168def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; 4169def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; 4170 4171def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; 4172def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; 4173 4174def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; 4175def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; 4176 4177let Predicates = [HasLOR] in { 4178 // v8.1a "Limited Order Region" extension load-acquire instructions 4179 def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; 4180 def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; 4181 def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; 4182 def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; 4183 4184 // v8.1a "Limited Order Region" extension store-release instructions 4185 def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; 4186 def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; 4187 def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; 4188 def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; 4189 4190 // Aliases for when offset=0 4191 def STLLRW0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRW GPR32: $Rt, GPR64sp:$Rn)>; 4192 def STLLRX0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRX GPR64: $Rt, GPR64sp:$Rn)>; 4193 def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]", (STLLRB GPR32: $Rt, GPR64sp:$Rn)>; 4194 def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]", (STLLRH GPR32: $Rt, GPR64sp:$Rn)>; 4195} 4196 4197//===----------------------------------------------------------------------===// 4198// Scaled floating point to integer conversion instructions. 4199//===----------------------------------------------------------------------===// 4200 4201defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; 4202defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; 4203defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; 4204defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; 4205defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; 4206defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; 4207defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; 4208defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; 4209defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4210defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4211defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4212defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4213 4214// AArch64's FCVT instructions saturate when out of range. 4215multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> { 4216 let Predicates = [HasFullFP16] in { 4217 def : Pat<(i32 (to_int_sat f16:$Rn, i32)), 4218 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4219 def : Pat<(i64 (to_int_sat f16:$Rn, i64)), 4220 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4221 } 4222 def : Pat<(i32 (to_int_sat f32:$Rn, i32)), 4223 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4224 def : Pat<(i64 (to_int_sat f32:$Rn, i64)), 4225 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4226 def : Pat<(i32 (to_int_sat f64:$Rn, i32)), 4227 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4228 def : Pat<(i64 (to_int_sat f64:$Rn, i64)), 4229 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4230 4231 let Predicates = [HasFullFP16] in { 4232 def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), 4233 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4234 def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), 4235 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4236 } 4237 def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), 4238 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4239 def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), 4240 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4241 def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), 4242 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4243 def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), 4244 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4245} 4246 4247defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">; 4248defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">; 4249 4250multiclass FPToIntegerIntPats<Intrinsic round, string INST> { 4251 let Predicates = [HasFullFP16] in { 4252 def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>; 4253 def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>; 4254 } 4255 def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>; 4256 def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>; 4257 def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>; 4258 def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>; 4259 4260 let Predicates = [HasFullFP16] in { 4261 def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), 4262 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4263 def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), 4264 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4265 } 4266 def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), 4267 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4268 def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), 4269 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4270 def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), 4271 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4272 def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), 4273 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4274} 4275 4276defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">; 4277defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">; 4278 4279multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> { 4280 def : Pat<(i32 (to_int (round f32:$Rn))), 4281 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4282 def : Pat<(i64 (to_int (round f32:$Rn))), 4283 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4284 def : Pat<(i32 (to_int (round f64:$Rn))), 4285 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4286 def : Pat<(i64 (to_int (round f64:$Rn))), 4287 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4288 4289 // These instructions saturate like fp_to_[su]int_sat. 4290 let Predicates = [HasFullFP16] in { 4291 def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), 4292 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4293 def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), 4294 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4295 } 4296 def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), 4297 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4298 def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), 4299 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4300 def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), 4301 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4302 def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), 4303 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4304} 4305 4306defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">; 4307defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">; 4308defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">; 4309defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">; 4310defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">; 4311defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">; 4312defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">; 4313defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">; 4314 4315 4316 4317let Predicates = [HasFullFP16] in { 4318 def : Pat<(i32 (any_lround f16:$Rn)), 4319 (FCVTASUWHr f16:$Rn)>; 4320 def : Pat<(i64 (any_lround f16:$Rn)), 4321 (FCVTASUXHr f16:$Rn)>; 4322 def : Pat<(i64 (any_llround f16:$Rn)), 4323 (FCVTASUXHr f16:$Rn)>; 4324} 4325def : Pat<(i32 (any_lround f32:$Rn)), 4326 (FCVTASUWSr f32:$Rn)>; 4327def : Pat<(i32 (any_lround f64:$Rn)), 4328 (FCVTASUWDr f64:$Rn)>; 4329def : Pat<(i64 (any_lround f32:$Rn)), 4330 (FCVTASUXSr f32:$Rn)>; 4331def : Pat<(i64 (any_lround f64:$Rn)), 4332 (FCVTASUXDr f64:$Rn)>; 4333def : Pat<(i64 (any_llround f32:$Rn)), 4334 (FCVTASUXSr f32:$Rn)>; 4335def : Pat<(i64 (any_llround f64:$Rn)), 4336 (FCVTASUXDr f64:$Rn)>; 4337 4338//===----------------------------------------------------------------------===// 4339// Scaled integer to floating point conversion instructions. 4340//===----------------------------------------------------------------------===// 4341 4342defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; 4343defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; 4344 4345//===----------------------------------------------------------------------===// 4346// Unscaled integer to floating point conversion instruction. 4347//===----------------------------------------------------------------------===// 4348 4349defm FMOV : UnscaledConversion<"fmov">; 4350 4351// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable 4352let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { 4353def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, 4354 Sched<[WriteF]>; 4355def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, 4356 Sched<[WriteF]>; 4357def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, 4358 Sched<[WriteF]>; 4359} 4360 4361// Similarly add aliases 4362def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, 4363 Requires<[HasFullFP16]>; 4364def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; 4365def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; 4366 4367def : Pat<(bf16 fpimm0), 4368 (FMOVH0)>; 4369 4370// Pattern for FP16 and BF16 immediates 4371let Predicates = [HasFullFP16] in { 4372 def : Pat<(f16 fpimm:$in), 4373 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; 4374 4375 def : Pat<(bf16 fpimm:$in), 4376 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>; 4377} 4378 4379//===----------------------------------------------------------------------===// 4380// Floating point conversion instruction. 4381//===----------------------------------------------------------------------===// 4382 4383defm FCVT : FPConversion<"fcvt">; 4384 4385//===----------------------------------------------------------------------===// 4386// Floating point single operand instructions. 4387//===----------------------------------------------------------------------===// 4388 4389defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>; 4390defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">; 4391defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>; 4392defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; 4393defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; 4394defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; 4395defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; 4396defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; 4397 4398defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; 4399defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; 4400 4401let SchedRW = [WriteFDiv] in { 4402defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; 4403} 4404 4405let Predicates = [HasFRInt3264] in { 4406 defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>; 4407 defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>; 4408 defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>; 4409 defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; 4410} // HasFRInt3264 4411 4412// Emitting strict_lrint as two instructions is valid as any exceptions that 4413// occur will happen in exactly one of the instructions (e.g. if the input is 4414// not an integer the inexact exception will happen in the FRINTX but not then 4415// in the FCVTZS as the output of FRINTX is an integer). 4416let Predicates = [HasFullFP16] in { 4417 def : Pat<(i32 (any_lrint f16:$Rn)), 4418 (FCVTZSUWHr (FRINTXHr f16:$Rn))>; 4419 def : Pat<(i64 (any_lrint f16:$Rn)), 4420 (FCVTZSUXHr (FRINTXHr f16:$Rn))>; 4421 def : Pat<(i64 (any_llrint f16:$Rn)), 4422 (FCVTZSUXHr (FRINTXHr f16:$Rn))>; 4423} 4424def : Pat<(i32 (any_lrint f32:$Rn)), 4425 (FCVTZSUWSr (FRINTXSr f32:$Rn))>; 4426def : Pat<(i32 (any_lrint f64:$Rn)), 4427 (FCVTZSUWDr (FRINTXDr f64:$Rn))>; 4428def : Pat<(i64 (any_lrint f32:$Rn)), 4429 (FCVTZSUXSr (FRINTXSr f32:$Rn))>; 4430def : Pat<(i64 (any_lrint f64:$Rn)), 4431 (FCVTZSUXDr (FRINTXDr f64:$Rn))>; 4432def : Pat<(i64 (any_llrint f32:$Rn)), 4433 (FCVTZSUXSr (FRINTXSr f32:$Rn))>; 4434def : Pat<(i64 (any_llrint f64:$Rn)), 4435 (FCVTZSUXDr (FRINTXDr f64:$Rn))>; 4436 4437//===----------------------------------------------------------------------===// 4438// Floating point two operand instructions. 4439//===----------------------------------------------------------------------===// 4440 4441defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; 4442let SchedRW = [WriteFDiv] in { 4443defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; 4444} 4445defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; 4446defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; 4447defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; 4448defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; 4449let SchedRW = [WriteFMul] in { 4450defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; 4451defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; 4452} 4453defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; 4454 4455multiclass FMULScalarFromIndexedLane0Patterns<string inst, 4456 string inst_f16_suffix, 4457 string inst_f32_suffix, 4458 string inst_f64_suffix, 4459 SDPatternOperator OpNode, 4460 list<Predicate> preds = []> { 4461 let Predicates = !listconcat(preds, [HasFullFP16]) in { 4462 def : Pat<(f16 (OpNode (f16 FPR16:$Rn), 4463 (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))), 4464 (!cast<Instruction>(inst # inst_f16_suffix) 4465 FPR16:$Rn, (EXTRACT_SUBREG V128:$Rm, hsub))>; 4466 } 4467 let Predicates = preds in { 4468 def : Pat<(f32 (OpNode (f32 FPR32:$Rn), 4469 (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))), 4470 (!cast<Instruction>(inst # inst_f32_suffix) 4471 FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>; 4472 def : Pat<(f64 (OpNode (f64 FPR64:$Rn), 4473 (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))), 4474 (!cast<Instruction>(inst # inst_f64_suffix) 4475 FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>; 4476 } 4477} 4478 4479defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr", 4480 any_fmul>; 4481 4482// Match reassociated forms of FNMUL. 4483def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)), 4484 (FNMULHrr FPR16:$a, FPR16:$b)>, 4485 Requires<[HasFullFP16]>; 4486def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)), 4487 (FNMULSrr FPR32:$a, FPR32:$b)>; 4488def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)), 4489 (FNMULDrr FPR64:$a, FPR64:$b)>; 4490 4491def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4492 (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; 4493def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4494 (FMINDrr FPR64:$Rn, FPR64:$Rm)>; 4495def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4496 (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; 4497def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4498 (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; 4499 4500//===----------------------------------------------------------------------===// 4501// Floating point three operand instructions. 4502//===----------------------------------------------------------------------===// 4503 4504defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; 4505defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", 4506 TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; 4507defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", 4508 TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; 4509defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", 4510 TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; 4511 4512// The following def pats catch the case where the LHS of an FMA is negated. 4513// The TriOpFrag above catches the case where the middle operand is negated. 4514 4515// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike 4516// the NEON variant. 4517 4518// Here we handle first -(a + b*c) for FNMADD: 4519 4520let Predicates = [HasNEON, HasFullFP16] in 4521def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), 4522 (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4523 4524def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), 4525 (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4526 4527def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), 4528 (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4529 4530// Now it's time for "(-a) + (-b)*c" 4531 4532let Predicates = [HasNEON, HasFullFP16] in 4533def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), 4534 (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4535 4536def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), 4537 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4538 4539def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), 4540 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4541 4542//===----------------------------------------------------------------------===// 4543// Floating point comparison instructions. 4544//===----------------------------------------------------------------------===// 4545 4546defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>; 4547defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>; 4548 4549//===----------------------------------------------------------------------===// 4550// Floating point conditional comparison instructions. 4551//===----------------------------------------------------------------------===// 4552 4553defm FCCMPE : FPCondComparison<1, "fccmpe">; 4554defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; 4555 4556//===----------------------------------------------------------------------===// 4557// Floating point conditional select instruction. 4558//===----------------------------------------------------------------------===// 4559 4560defm FCSEL : FPCondSelect<"fcsel">; 4561 4562let Predicates = [HasFullFP16] in 4563def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)), 4564 (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>; 4565 4566// CSEL instructions providing f128 types need to be handled by a 4567// pseudo-instruction since the eventual code will need to introduce basic 4568// blocks and control flow. 4569def F128CSEL : Pseudo<(outs FPR128:$Rd), 4570 (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), 4571 [(set (f128 FPR128:$Rd), 4572 (AArch64csel FPR128:$Rn, FPR128:$Rm, 4573 (i32 imm:$cond), NZCV))]> { 4574 let Uses = [NZCV]; 4575 let usesCustomInserter = 1; 4576 let hasNoSchedulingInfo = 1; 4577} 4578 4579//===----------------------------------------------------------------------===// 4580// Instructions used for emitting unwind opcodes on ARM64 Windows. 4581//===----------------------------------------------------------------------===// 4582let isPseudo = 1 in { 4583 def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>; 4584 def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4585 def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4586 def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4587 def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4588 def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4589 def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4590 def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4591 def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4592 def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4593 def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4594 def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>; 4595 def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4596 def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>; 4597 def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4598 def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>; 4599 def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4600 def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>; 4601} 4602 4603// Pseudo instructions for Windows EH 4604//===----------------------------------------------------------------------===// 4605let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, 4606 isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in { 4607 def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>; 4608 let usesCustomInserter = 1 in 4609 def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>, 4610 Sched<[]>; 4611} 4612 4613// Pseudo instructions for homogeneous prolog/epilog 4614let isPseudo = 1 in { 4615 // Save CSRs in order, {FPOffset} 4616 def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4617 // Restore CSRs in order 4618 def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4619} 4620 4621//===----------------------------------------------------------------------===// 4622// Floating point immediate move. 4623//===----------------------------------------------------------------------===// 4624 4625let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 4626defm FMOV : FPMoveImmediate<"fmov">; 4627} 4628 4629let Predicates = [HasFullFP16] in { 4630 def : Pat<(bf16 fpimmbf16:$in), 4631 (FMOVHi (fpimm16XForm bf16:$in))>; 4632} 4633 4634//===----------------------------------------------------------------------===// 4635// Advanced SIMD two vector instructions. 4636//===----------------------------------------------------------------------===// 4637 4638defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", 4639 AArch64uabd>; 4640// Match UABDL in log2-shuffle patterns. 4641def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), 4642 (zext (v8i8 V64:$opB))))), 4643 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4644def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4645 (v8i16 (add (sub (zext (v8i8 V64:$opA)), 4646 (zext (v8i8 V64:$opB))), 4647 (AArch64vashr v8i16:$src, (i32 15))))), 4648 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4649def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4650 (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), 4651 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4652def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4653 (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4654 (zext (extract_high_v16i8 (v16i8 V128:$opB)))), 4655 (AArch64vashr v8i16:$src, (i32 15))))), 4656 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4657def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), 4658 (zext (v4i16 V64:$opB))))), 4659 (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; 4660def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))), 4661 (zext (extract_high_v8i16 (v8i16 V128:$opB)))))), 4662 (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; 4663def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), 4664 (zext (v2i32 V64:$opB))))), 4665 (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; 4666def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))), 4667 (zext (extract_high_v4i32 (v4i32 V128:$opB)))))), 4668 (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; 4669 4670defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; 4671defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; 4672defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; 4673defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; 4674defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; 4675defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; 4676defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; 4677defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; 4678defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; 4679defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>; 4680 4681def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), 4682 (CMLTv8i8rz V64:$Rn)>; 4683def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))), 4684 (CMLTv4i16rz V64:$Rn)>; 4685def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))), 4686 (CMLTv2i32rz V64:$Rn)>; 4687def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))), 4688 (CMLTv16i8rz V128:$Rn)>; 4689def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))), 4690 (CMLTv8i16rz V128:$Rn)>; 4691def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))), 4692 (CMLTv4i32rz V128:$Rn)>; 4693def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))), 4694 (CMLTv2i64rz V128:$Rn)>; 4695 4696defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 4697defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 4698defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 4699defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 4700defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 4701defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; 4702defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; 4703defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; 4704def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), 4705 (FCVTLv4i16 V64:$Rn)>; 4706def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), 4707 (i64 4)))), 4708 (FCVTLv8i16 V128:$Rn)>; 4709def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), 4710 (FCVTLv2i32 V64:$Rn)>; 4711def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))), 4712 (FCVTLv4i32 V128:$Rn)>; 4713def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), 4714 (FCVTLv4i16 V64:$Rn)>; 4715def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))), 4716 (FCVTLv8i16 V128:$Rn)>; 4717 4718defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; 4719defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; 4720defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; 4721defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; 4722defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; 4723def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), 4724 (FCVTNv4i16 V128:$Rn)>; 4725def : Pat<(concat_vectors V64:$Rd, 4726 (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), 4727 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4728def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), 4729 (FCVTNv2i32 V128:$Rn)>; 4730def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), 4731 (FCVTNv4i16 V128:$Rn)>; 4732def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), 4733 (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4734def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))), 4735 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4736defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; 4737defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; 4738defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", 4739 int_aarch64_neon_fcvtxn>; 4740defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; 4741defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; 4742 4743// AArch64's FCVT instructions saturate when out of range. 4744multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> { 4745 let Predicates = [HasFullFP16] in { 4746 def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)), 4747 (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>; 4748 def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)), 4749 (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>; 4750 } 4751 def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)), 4752 (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>; 4753 def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)), 4754 (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>; 4755 def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)), 4756 (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>; 4757} 4758defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">; 4759defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">; 4760 4761def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; 4762def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; 4763def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; 4764def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>; 4765def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>; 4766 4767def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>; 4768def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>; 4769def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; 4770def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; 4771def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; 4772 4773defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>; 4774defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; 4775defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; 4776defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; 4777defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; 4778defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; 4779defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; 4780defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; 4781defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; 4782 4783let Predicates = [HasFRInt3264] in { 4784 defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; 4785 defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>; 4786 defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>; 4787 defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>; 4788} // HasFRInt3264 4789 4790defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; 4791defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; 4792defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", 4793 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 4794defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; 4795// Aliases for MVN -> NOT. 4796def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", 4797 (NOTv8i8 V64:$Vd, V64:$Vn)>; 4798def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", 4799 (NOTv16i8 V128:$Vd, V128:$Vn)>; 4800 4801def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4802def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4803def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4804def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4805def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4806def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4807 4808defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>; 4809defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; 4810defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; 4811defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; 4812defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", 4813 BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >; 4814defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>; 4815defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; 4816defm SHLL : SIMDVectorLShiftLongBySizeBHS; 4817defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 4818defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 4819defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; 4820defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; 4821defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; 4822defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", 4823 BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; 4824defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; 4825defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; 4826defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; 4827defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; 4828defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; 4829defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; 4830defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; 4831 4832def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 4833def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 4834def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 4835def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 4836def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 4837def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 4838def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 4839def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 4840def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; 4841def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; 4842 4843// Patterns for vector long shift (by element width). These need to match all 4844// three of zext, sext and anyext so it's easier to pull the patterns out of the 4845// definition. 4846multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { 4847 def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), 4848 (SHLLv8i8 V64:$Rn)>; 4849 def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)), 4850 (SHLLv16i8 V128:$Rn)>; 4851 def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), 4852 (SHLLv4i16 V64:$Rn)>; 4853 def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)), 4854 (SHLLv8i16 V128:$Rn)>; 4855 def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), 4856 (SHLLv2i32 V64:$Rn)>; 4857 def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)), 4858 (SHLLv4i32 V128:$Rn)>; 4859} 4860 4861defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; 4862defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; 4863defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; 4864 4865// Constant vector values, used in the S/UQXTN patterns below. 4866def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>; 4867def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>; 4868def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>; 4869def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>; 4870def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>; 4871def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>; 4872 4873// trunc(umin(X, 255)) -> UQXTRN v8i8 4874def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))), 4875 (UQXTNv8i8 V128:$Vn)>; 4876// trunc(umin(X, 65535)) -> UQXTRN v4i16 4877def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))), 4878 (UQXTNv4i16 V128:$Vn)>; 4879// trunc(smin(smax(X, -128), 128)) -> SQXTRN 4880// with reversed min/max 4881def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 4882 (v8i16 VImm7F)))), 4883 (SQXTNv8i8 V128:$Vn)>; 4884def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 4885 (v8i16 VImm80)))), 4886 (SQXTNv8i8 V128:$Vn)>; 4887// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN 4888// with reversed min/max 4889def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 4890 (v4i32 VImm7FFF)))), 4891 (SQXTNv4i16 V128:$Vn)>; 4892def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 4893 (v4i32 VImm8000)))), 4894 (SQXTNv4i16 V128:$Vn)>; 4895 4896// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) 4897// with reversed min/max 4898def : Pat<(v16i8 (concat_vectors 4899 (v8i8 V64:$Vd), 4900 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 4901 (v8i16 VImm7F)))))), 4902 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4903def : Pat<(v16i8 (concat_vectors 4904 (v8i8 V64:$Vd), 4905 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 4906 (v8i16 VImm80)))))), 4907 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4908 4909// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) 4910// with reversed min/max 4911def : Pat<(v8i16 (concat_vectors 4912 (v4i16 V64:$Vd), 4913 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 4914 (v4i32 VImm7FFF)))))), 4915 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4916def : Pat<(v8i16 (concat_vectors 4917 (v4i16 V64:$Vd), 4918 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 4919 (v4i32 VImm8000)))))), 4920 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4921 4922//===----------------------------------------------------------------------===// 4923// Advanced SIMD three vector instructions. 4924//===----------------------------------------------------------------------===// 4925 4926defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; 4927defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>; 4928defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; 4929defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; 4930defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; 4931defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; 4932defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; 4933defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; 4934foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in { 4935def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>; 4936} 4937defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; 4938let Predicates = [HasNEON] in { 4939foreach VT = [ v2f32, v4f32, v2f64 ] in 4940def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 4941} 4942let Predicates = [HasNEON, HasFullFP16] in { 4943foreach VT = [ v4f16, v8f16 ] in 4944def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 4945} 4946defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>; 4947defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>; 4948defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>; 4949defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; 4950defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 4951defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 4952defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 4953defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; 4954defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; 4955defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; 4956defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; 4957defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; 4958defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; 4959defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; 4960defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; 4961defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; 4962 4963// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the 4964// instruction expects the addend first, while the fma intrinsic puts it last. 4965defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", 4966 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 4967defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", 4968 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 4969 4970defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; 4971defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; 4972defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; 4973defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; 4974defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; 4975 4976// MLA and MLS are generated in MachineCombine 4977defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; 4978defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; 4979 4980defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; 4981defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; 4982defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", 4983 TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; 4984defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; 4985defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>; 4986defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; 4987defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; 4988defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; 4989defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; 4990defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; 4991defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; 4992defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; 4993defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; 4994defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; 4995defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; 4996defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; 4997defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>; 4998defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; 4999defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; 5000defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; 5001defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", 5002 TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; 5003defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; 5004defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>; 5005defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; 5006defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; 5007defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; 5008defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; 5009defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; 5010defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; 5011defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; 5012defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; 5013defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; 5014defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>; 5015defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; 5016defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; 5017defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", 5018 int_aarch64_neon_sqrdmlah>; 5019defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", 5020 int_aarch64_neon_sqrdmlsh>; 5021 5022// Extra saturate patterns, other than the intrinsics matches above 5023defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>; 5024defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>; 5025defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>; 5026defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>; 5027 5028defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; 5029defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", 5030 BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; 5031defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; 5032defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", 5033 BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; 5034defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; 5035 5036// Pseudo bitwise select pattern BSP. 5037// It is expanded into BSL/BIT/BIF after register allocation. 5038defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS), 5039 (and (vnot node:$LHS), node:$RHS))>>; 5040defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">; 5041defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; 5042defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">; 5043 5044def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), 5045 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5046def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), 5047 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5048def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), 5049 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5050def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), 5051 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5052 5053def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), 5054 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5055def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), 5056 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5057def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), 5058 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5059def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), 5060 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5061 5062def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", 5063 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; 5064def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", 5065 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5066def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", 5067 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5068def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", 5069 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5070 5071def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", 5072 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; 5073def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", 5074 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5075def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", 5076 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5077def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", 5078 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5079 5080def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # 5081 "|cmls.8b\t$dst, $src1, $src2}", 5082 (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5083def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # 5084 "|cmls.16b\t$dst, $src1, $src2}", 5085 (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5086def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # 5087 "|cmls.4h\t$dst, $src1, $src2}", 5088 (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5089def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # 5090 "|cmls.8h\t$dst, $src1, $src2}", 5091 (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5092def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # 5093 "|cmls.2s\t$dst, $src1, $src2}", 5094 (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5095def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # 5096 "|cmls.4s\t$dst, $src1, $src2}", 5097 (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5098def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # 5099 "|cmls.2d\t$dst, $src1, $src2}", 5100 (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5101 5102def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # 5103 "|cmlo.8b\t$dst, $src1, $src2}", 5104 (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5105def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # 5106 "|cmlo.16b\t$dst, $src1, $src2}", 5107 (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5108def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # 5109 "|cmlo.4h\t$dst, $src1, $src2}", 5110 (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5111def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # 5112 "|cmlo.8h\t$dst, $src1, $src2}", 5113 (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5114def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # 5115 "|cmlo.2s\t$dst, $src1, $src2}", 5116 (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5117def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # 5118 "|cmlo.4s\t$dst, $src1, $src2}", 5119 (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5120def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # 5121 "|cmlo.2d\t$dst, $src1, $src2}", 5122 (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5123 5124def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # 5125 "|cmle.8b\t$dst, $src1, $src2}", 5126 (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5127def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # 5128 "|cmle.16b\t$dst, $src1, $src2}", 5129 (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5130def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # 5131 "|cmle.4h\t$dst, $src1, $src2}", 5132 (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5133def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # 5134 "|cmle.8h\t$dst, $src1, $src2}", 5135 (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5136def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # 5137 "|cmle.2s\t$dst, $src1, $src2}", 5138 (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5139def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # 5140 "|cmle.4s\t$dst, $src1, $src2}", 5141 (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5142def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # 5143 "|cmle.2d\t$dst, $src1, $src2}", 5144 (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5145 5146def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # 5147 "|cmlt.8b\t$dst, $src1, $src2}", 5148 (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5149def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # 5150 "|cmlt.16b\t$dst, $src1, $src2}", 5151 (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5152def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # 5153 "|cmlt.4h\t$dst, $src1, $src2}", 5154 (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5155def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # 5156 "|cmlt.8h\t$dst, $src1, $src2}", 5157 (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5158def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # 5159 "|cmlt.2s\t$dst, $src1, $src2}", 5160 (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5161def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # 5162 "|cmlt.4s\t$dst, $src1, $src2}", 5163 (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5164def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # 5165 "|cmlt.2d\t$dst, $src1, $src2}", 5166 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5167 5168let Predicates = [HasNEON, HasFullFP16] in { 5169def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # 5170 "|fcmle.4h\t$dst, $src1, $src2}", 5171 (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5172def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # 5173 "|fcmle.8h\t$dst, $src1, $src2}", 5174 (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5175} 5176def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # 5177 "|fcmle.2s\t$dst, $src1, $src2}", 5178 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5179def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # 5180 "|fcmle.4s\t$dst, $src1, $src2}", 5181 (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5182def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # 5183 "|fcmle.2d\t$dst, $src1, $src2}", 5184 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5185 5186let Predicates = [HasNEON, HasFullFP16] in { 5187def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # 5188 "|fcmlt.4h\t$dst, $src1, $src2}", 5189 (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5190def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # 5191 "|fcmlt.8h\t$dst, $src1, $src2}", 5192 (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5193} 5194def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # 5195 "|fcmlt.2s\t$dst, $src1, $src2}", 5196 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5197def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # 5198 "|fcmlt.4s\t$dst, $src1, $src2}", 5199 (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5200def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # 5201 "|fcmlt.2d\t$dst, $src1, $src2}", 5202 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5203 5204let Predicates = [HasNEON, HasFullFP16] in { 5205def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # 5206 "|facle.4h\t$dst, $src1, $src2}", 5207 (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5208def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # 5209 "|facle.8h\t$dst, $src1, $src2}", 5210 (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5211} 5212def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # 5213 "|facle.2s\t$dst, $src1, $src2}", 5214 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5215def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # 5216 "|facle.4s\t$dst, $src1, $src2}", 5217 (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5218def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # 5219 "|facle.2d\t$dst, $src1, $src2}", 5220 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5221 5222let Predicates = [HasNEON, HasFullFP16] in { 5223def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # 5224 "|faclt.4h\t$dst, $src1, $src2}", 5225 (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5226def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # 5227 "|faclt.8h\t$dst, $src1, $src2}", 5228 (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5229} 5230def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # 5231 "|faclt.2s\t$dst, $src1, $src2}", 5232 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5233def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # 5234 "|faclt.4s\t$dst, $src1, $src2}", 5235 (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5236def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # 5237 "|faclt.2d\t$dst, $src1, $src2}", 5238 (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5239 5240//===----------------------------------------------------------------------===// 5241// Advanced SIMD three scalar instructions. 5242//===----------------------------------------------------------------------===// 5243 5244defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; 5245defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; 5246defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; 5247defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; 5248defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; 5249defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; 5250defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; 5251defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; 5252def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 5253 (FABD64 FPR64:$Rn, FPR64:$Rm)>; 5254let Predicates = [HasNEON, HasFullFP16] in { 5255def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; 5256} 5257let Predicates = [HasNEON] in { 5258def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; 5259def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; 5260} 5261defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", 5262 int_aarch64_neon_facge>; 5263defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", 5264 int_aarch64_neon_facgt>; 5265defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 5266defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 5267defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 5268defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>; 5269defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>; 5270defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>; 5271defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; 5272defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; 5273defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 5274defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; 5275defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; 5276defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; 5277defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; 5278defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; 5279defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; 5280defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; 5281defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; 5282defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; 5283defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; 5284defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; 5285defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; 5286let Predicates = [HasRDM] in { 5287 defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; 5288 defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; 5289 def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5290 (i32 FPR32:$Rm))), 5291 (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5292 def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5293 (i32 FPR32:$Rm))), 5294 (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5295} 5296 5297defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64", 5298 int_aarch64_neon_fmulx, 5299 [HasNEONorSME]>; 5300 5301def : InstAlias<"cmls $dst, $src1, $src2", 5302 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5303def : InstAlias<"cmle $dst, $src1, $src2", 5304 (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5305def : InstAlias<"cmlo $dst, $src1, $src2", 5306 (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5307def : InstAlias<"cmlt $dst, $src1, $src2", 5308 (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5309def : InstAlias<"fcmle $dst, $src1, $src2", 5310 (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5311def : InstAlias<"fcmle $dst, $src1, $src2", 5312 (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5313def : InstAlias<"fcmlt $dst, $src1, $src2", 5314 (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5315def : InstAlias<"fcmlt $dst, $src1, $src2", 5316 (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5317def : InstAlias<"facle $dst, $src1, $src2", 5318 (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5319def : InstAlias<"facle $dst, $src1, $src2", 5320 (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5321def : InstAlias<"faclt $dst, $src1, $src2", 5322 (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5323def : InstAlias<"faclt $dst, $src1, $src2", 5324 (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5325 5326//===----------------------------------------------------------------------===// 5327// Advanced SIMD three scalar instructions (mixed operands). 5328//===----------------------------------------------------------------------===// 5329defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", 5330 int_aarch64_neon_sqdmulls_scalar>; 5331defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; 5332defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; 5333 5334def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), 5335 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5336 (i32 FPR32:$Rm))))), 5337 (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5338def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), 5339 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5340 (i32 FPR32:$Rm))))), 5341 (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5342 5343//===----------------------------------------------------------------------===// 5344// Advanced SIMD two scalar instructions. 5345//===----------------------------------------------------------------------===// 5346 5347defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs, [HasNoCSSC]>; 5348defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; 5349defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; 5350defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; 5351defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; 5352defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; 5353defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 5354defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 5355defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 5356defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 5357defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 5358defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; 5359defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; 5360defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; 5361defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; 5362defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; 5363defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; 5364defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; 5365defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; 5366def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; 5367defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; 5368defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; 5369defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>; 5370defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>; 5371defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>; 5372defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", 5373 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 5374defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; 5375defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 5376defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 5377defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; 5378defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; 5379defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", 5380 int_aarch64_neon_suqadd>; 5381defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; 5382defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; 5383defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", 5384 int_aarch64_neon_usqadd>; 5385 5386def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), 5387 (CMLTv1i64rz V64:$Rn)>; 5388 5389def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), 5390 (FCVTASv1i64 FPR64:$Rn)>; 5391def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), 5392 (FCVTAUv1i64 FPR64:$Rn)>; 5393def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), 5394 (FCVTMSv1i64 FPR64:$Rn)>; 5395def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), 5396 (FCVTMUv1i64 FPR64:$Rn)>; 5397def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), 5398 (FCVTNSv1i64 FPR64:$Rn)>; 5399def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), 5400 (FCVTNUv1i64 FPR64:$Rn)>; 5401def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), 5402 (FCVTPSv1i64 FPR64:$Rn)>; 5403def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), 5404 (FCVTPUv1i64 FPR64:$Rn)>; 5405def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), 5406 (FCVTZSv1i64 FPR64:$Rn)>; 5407def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), 5408 (FCVTZUv1i64 FPR64:$Rn)>; 5409 5410def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), 5411 (FRECPEv1f16 FPR16:$Rn)>; 5412def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), 5413 (FRECPEv1i32 FPR32:$Rn)>; 5414def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), 5415 (FRECPEv1i64 FPR64:$Rn)>; 5416def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), 5417 (FRECPEv1i64 FPR64:$Rn)>; 5418 5419def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), 5420 (FRECPEv1i32 FPR32:$Rn)>; 5421def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), 5422 (FRECPEv2f32 V64:$Rn)>; 5423def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), 5424 (FRECPEv4f32 FPR128:$Rn)>; 5425def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), 5426 (FRECPEv1i64 FPR64:$Rn)>; 5427def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), 5428 (FRECPEv1i64 FPR64:$Rn)>; 5429def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), 5430 (FRECPEv2f64 FPR128:$Rn)>; 5431 5432def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5433 (FRECPS32 FPR32:$Rn, FPR32:$Rm)>; 5434def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5435 (FRECPSv2f32 V64:$Rn, V64:$Rm)>; 5436def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5437 (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5438def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5439 (FRECPS64 FPR64:$Rn, FPR64:$Rm)>; 5440def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5441 (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5442 5443def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))), 5444 (FRECPXv1f16 FPR16:$Rn)>; 5445def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), 5446 (FRECPXv1i32 FPR32:$Rn)>; 5447def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), 5448 (FRECPXv1i64 FPR64:$Rn)>; 5449 5450def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))), 5451 (FRSQRTEv1f16 FPR16:$Rn)>; 5452def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), 5453 (FRSQRTEv1i32 FPR32:$Rn)>; 5454def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), 5455 (FRSQRTEv1i64 FPR64:$Rn)>; 5456def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), 5457 (FRSQRTEv1i64 FPR64:$Rn)>; 5458 5459def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), 5460 (FRSQRTEv1i32 FPR32:$Rn)>; 5461def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), 5462 (FRSQRTEv2f32 V64:$Rn)>; 5463def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), 5464 (FRSQRTEv4f32 FPR128:$Rn)>; 5465def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), 5466 (FRSQRTEv1i64 FPR64:$Rn)>; 5467def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), 5468 (FRSQRTEv1i64 FPR64:$Rn)>; 5469def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), 5470 (FRSQRTEv2f64 FPR128:$Rn)>; 5471 5472def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5473 (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>; 5474def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5475 (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>; 5476def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5477 (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5478def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5479 (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>; 5480def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5481 (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5482 5483// Some float -> int -> float conversion patterns for which we want to keep the 5484// int values in FP registers using the corresponding NEON instructions to 5485// avoid more costly int <-> fp register transfers. 5486let Predicates = [HasNEON] in { 5487def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), 5488 (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; 5489def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), 5490 (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; 5491def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), 5492 (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; 5493def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), 5494 (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; 5495 5496let Predicates = [HasFullFP16] in { 5497def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), 5498 (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; 5499def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), 5500 (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; 5501} 5502// If an integer is about to be converted to a floating point value, 5503// just load it on the floating point unit. 5504// Here are the patterns for 8 and 16-bits to float. 5505// 8-bits -> float. 5506multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy, 5507 SDPatternOperator loadop, Instruction UCVTF, 5508 ROAddrMode ro, Instruction LDRW, Instruction LDRX, 5509 SubRegIndex sub> { 5510 def : Pat<(DstTy (uint_to_fp (SrcTy 5511 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, 5512 ro.Wext:$extend))))), 5513 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5514 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 5515 sub))>; 5516 5517 def : Pat<(DstTy (uint_to_fp (SrcTy 5518 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, 5519 ro.Wext:$extend))))), 5520 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5521 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 5522 sub))>; 5523} 5524 5525defm : UIntToFPROLoadPat<f32, i32, zextloadi8, 5526 UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>; 5527def : Pat <(f32 (uint_to_fp (i32 5528 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5529 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5530 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5531def : Pat <(f32 (uint_to_fp (i32 5532 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5533 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5534 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5535// 16-bits -> float. 5536defm : UIntToFPROLoadPat<f32, i32, zextloadi16, 5537 UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>; 5538def : Pat <(f32 (uint_to_fp (i32 5539 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5540 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5541 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5542def : Pat <(f32 (uint_to_fp (i32 5543 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5544 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5545 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5546// 32-bits are handled in target specific dag combine: 5547// performIntToFpCombine. 5548// 64-bits integer to 32-bits floating point, not possible with 5549// UCVTF on floating point registers (both source and destination 5550// must have the same size). 5551 5552// Here are the patterns for 8, 16, 32, and 64-bits to double. 5553// 8-bits -> double. 5554defm : UIntToFPROLoadPat<f64, i32, zextloadi8, 5555 UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>; 5556def : Pat <(f64 (uint_to_fp (i32 5557 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5558 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5559 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5560def : Pat <(f64 (uint_to_fp (i32 5561 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5562 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5563 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5564// 16-bits -> double. 5565defm : UIntToFPROLoadPat<f64, i32, zextloadi16, 5566 UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>; 5567def : Pat <(f64 (uint_to_fp (i32 5568 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5569 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5570 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5571def : Pat <(f64 (uint_to_fp (i32 5572 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5573 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5574 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5575// 32-bits -> double. 5576defm : UIntToFPROLoadPat<f64, i32, load, 5577 UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>; 5578def : Pat <(f64 (uint_to_fp (i32 5579 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 5580 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5581 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; 5582def : Pat <(f64 (uint_to_fp (i32 5583 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), 5584 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5585 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; 5586// 64-bits -> double are handled in target specific dag combine: 5587// performIntToFpCombine. 5588} // let Predicates = [HasNEON] 5589 5590//===----------------------------------------------------------------------===// 5591// Advanced SIMD three different-sized vector instructions. 5592//===----------------------------------------------------------------------===// 5593 5594defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; 5595defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; 5596defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; 5597defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; 5598defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>; 5599defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", 5600 AArch64sabd>; 5601defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", 5602 AArch64sabd>; 5603defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", 5604 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; 5605defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", 5606 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; 5607defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", 5608 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5609defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", 5610 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5611defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>; 5612defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", 5613 int_aarch64_neon_sqadd>; 5614defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", 5615 int_aarch64_neon_sqsub>; 5616defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", 5617 int_aarch64_neon_sqdmull>; 5618defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", 5619 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; 5620defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", 5621 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; 5622defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", 5623 AArch64uabd>; 5624defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", 5625 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; 5626defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", 5627 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; 5628defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", 5629 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5630defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", 5631 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5632defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>; 5633defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", 5634 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; 5635defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", 5636 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; 5637 5638// Additional patterns for [SU]ML[AS]L 5639multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode, 5640 Instruction INST8B, Instruction INST4H, Instruction INST2S> { 5641 def : Pat<(v4i16 (opnode 5642 V64:$Ra, 5643 (v4i16 (extract_subvector 5644 (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)), 5645 (i64 0))))), 5646 (EXTRACT_SUBREG (v8i16 (INST8B 5647 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub), 5648 V64:$Rn, V64:$Rm)), dsub)>; 5649 def : Pat<(v2i32 (opnode 5650 V64:$Ra, 5651 (v2i32 (extract_subvector 5652 (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)), 5653 (i64 0))))), 5654 (EXTRACT_SUBREG (v4i32 (INST4H 5655 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub), 5656 V64:$Rn, V64:$Rm)), dsub)>; 5657 def : Pat<(v1i64 (opnode 5658 V64:$Ra, 5659 (v1i64 (extract_subvector 5660 (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)), 5661 (i64 0))))), 5662 (EXTRACT_SUBREG (v2i64 (INST2S 5663 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub), 5664 V64:$Rn, V64:$Rm)), dsub)>; 5665} 5666 5667defm : Neon_mul_acc_widen_patterns<add, AArch64umull, 5668 UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; 5669defm : Neon_mul_acc_widen_patterns<add, AArch64smull, 5670 SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; 5671defm : Neon_mul_acc_widen_patterns<sub, AArch64umull, 5672 UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; 5673defm : Neon_mul_acc_widen_patterns<sub, AArch64smull, 5674 SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; 5675 5676 5677multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> { 5678 def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)), 5679 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), 5680 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>; 5681 def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)), 5682 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), 5683 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>; 5684 def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)), 5685 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), 5686 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>; 5687 5688 def : Pat<(v4i16 (opnode (v4i16 V64:$Rn), 5689 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), 5690 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5691 def : Pat<(v2i32 (opnode (v2i32 V64:$Rn), 5692 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), 5693 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5694 def : Pat<(v1i64 (opnode (v1i64 V64:$Rn), 5695 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), 5696 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5697} 5698 5699defm : Neon_addl_extract_patterns<add, zanyext, "UADD">; 5700defm : Neon_addl_extract_patterns<add, sext, "SADD">; 5701defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">; 5702defm : Neon_addl_extract_patterns<sub, sext, "SSUB">; 5703 5704// CodeGen patterns for addhn and subhn instructions, which can actually be 5705// written in LLVM IR without too much difficulty. 5706 5707// Prioritize ADDHN and SUBHN over UZP2. 5708let AddedComplexity = 10 in { 5709 5710// ADDHN 5711def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), 5712 (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5713def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5714 (i32 16))))), 5715 (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5716def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5717 (i32 32))))), 5718 (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5719def : Pat<(concat_vectors (v8i8 V64:$Rd), 5720 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5721 (i32 8))))), 5722 (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5723 V128:$Rn, V128:$Rm)>; 5724def : Pat<(concat_vectors (v4i16 V64:$Rd), 5725 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5726 (i32 16))))), 5727 (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5728 V128:$Rn, V128:$Rm)>; 5729def : Pat<(concat_vectors (v2i32 V64:$Rd), 5730 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5731 (i32 32))))), 5732 (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5733 V128:$Rn, V128:$Rm)>; 5734 5735// SUBHN 5736def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), 5737 (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5738def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5739 (i32 16))))), 5740 (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5741def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5742 (i32 32))))), 5743 (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5744def : Pat<(concat_vectors (v8i8 V64:$Rd), 5745 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5746 (i32 8))))), 5747 (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5748 V128:$Rn, V128:$Rm)>; 5749def : Pat<(concat_vectors (v4i16 V64:$Rd), 5750 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5751 (i32 16))))), 5752 (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5753 V128:$Rn, V128:$Rm)>; 5754def : Pat<(concat_vectors (v2i32 V64:$Rd), 5755 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5756 (i32 32))))), 5757 (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5758 V128:$Rn, V128:$Rm)>; 5759 5760} // AddedComplexity = 10 5761 5762//---------------------------------------------------------------------------- 5763// AdvSIMD bitwise extract from vector instruction. 5764//---------------------------------------------------------------------------- 5765 5766defm EXT : SIMDBitwiseExtract<"ext">; 5767 5768def AdjustExtImm : SDNodeXForm<imm, [{ 5769 return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32); 5770}]>; 5771multiclass ExtPat<ValueType VT64, ValueType VT128, int N> { 5772 def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 5773 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 5774 def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 5775 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 5776 // We use EXT to handle extract_subvector to copy the upper 64-bits of a 5777 // 128-bit vector. 5778 def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))), 5779 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 5780 // A 64-bit EXT of two halves of the same 128-bit register can be done as a 5781 // single 128-bit EXT. 5782 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)), 5783 (extract_subvector V128:$Rn, (i64 N)), 5784 (i32 imm:$imm))), 5785 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>; 5786 // A 64-bit EXT of the high half of a 128-bit register can be done using a 5787 // 128-bit EXT of the whole register with an adjustment to the immediate. The 5788 // top half of the other operand will be unset, but that doesn't matter as it 5789 // will not be used. 5790 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)), 5791 V64:$Rm, 5792 (i32 imm:$imm))), 5793 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, 5794 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 5795 (AdjustExtImm imm:$imm)), dsub)>; 5796} 5797 5798defm : ExtPat<v8i8, v16i8, 8>; 5799defm : ExtPat<v4i16, v8i16, 4>; 5800defm : ExtPat<v4f16, v8f16, 4>; 5801defm : ExtPat<v4bf16, v8bf16, 4>; 5802defm : ExtPat<v2i32, v4i32, 2>; 5803defm : ExtPat<v2f32, v4f32, 2>; 5804defm : ExtPat<v1i64, v2i64, 1>; 5805defm : ExtPat<v1f64, v2f64, 1>; 5806 5807//---------------------------------------------------------------------------- 5808// AdvSIMD zip vector 5809//---------------------------------------------------------------------------- 5810 5811defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; 5812defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; 5813defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; 5814defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; 5815defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; 5816defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; 5817 5818def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))), 5819 (v8i8 (trunc (v8i16 V128:$Vm))))), 5820 (UZP1v16i8 V128:$Vn, V128:$Vm)>; 5821def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))), 5822 (v4i16 (trunc (v4i32 V128:$Vm))))), 5823 (UZP1v8i16 V128:$Vn, V128:$Vm)>; 5824def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), 5825 (v2i32 (trunc (v2i64 V128:$Vm))))), 5826 (UZP1v4i32 V128:$Vn, V128:$Vm)>; 5827 5828def : Pat<(v16i8 (concat_vectors 5829 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), 5830 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), 5831 (UZP2v16i8 V128:$Vn, V128:$Vm)>; 5832def : Pat<(v8i16 (concat_vectors 5833 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), 5834 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), 5835 (UZP2v8i16 V128:$Vn, V128:$Vm)>; 5836def : Pat<(v4i32 (concat_vectors 5837 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), 5838 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), 5839 (UZP2v4i32 V128:$Vn, V128:$Vm)>; 5840 5841//---------------------------------------------------------------------------- 5842// AdvSIMD TBL/TBX instructions 5843//---------------------------------------------------------------------------- 5844 5845defm TBL : SIMDTableLookup< 0, "tbl">; 5846defm TBX : SIMDTableLookupTied<1, "tbx">; 5847 5848def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 5849 (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; 5850def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 5851 (TBLv16i8One V128:$Ri, V128:$Rn)>; 5852 5853def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), 5854 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 5855 (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; 5856def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), 5857 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 5858 (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; 5859 5860 5861//---------------------------------------------------------------------------- 5862// AdvSIMD scalar DUP instruction 5863//---------------------------------------------------------------------------- 5864 5865defm DUP : SIMDScalarDUP<"mov">; 5866 5867//---------------------------------------------------------------------------- 5868// AdvSIMD scalar pairwise instructions 5869//---------------------------------------------------------------------------- 5870 5871defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; 5872defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; 5873defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; 5874defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; 5875defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; 5876defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; 5877 5878// Only the lower half of the result of the inner FADDP is used in the patterns 5879// below, so the second operand does not matter. Re-use the first input 5880// operand, so no additional dependencies need to be introduced. 5881let Predicates = [HasFullFP16] in { 5882def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))), 5883 (FADDPv2i16p 5884 (EXTRACT_SUBREG 5885 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn), 5886 dsub))>; 5887def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))), 5888 (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>; 5889} 5890def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))), 5891 (FADDPv2i32p 5892 (EXTRACT_SUBREG 5893 (FADDPv4f32 V128:$Rn, V128:$Rn), 5894 dsub))>; 5895def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))), 5896 (FADDPv2i32p V64:$Rn)>; 5897def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))), 5898 (FADDPv2i64p V128:$Rn)>; 5899 5900def : Pat<(v2i64 (AArch64saddv V128:$Rn)), 5901 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 5902def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), 5903 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 5904def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), 5905 (FADDPv2i32p V64:$Rn)>; 5906def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), 5907 (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; 5908def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), 5909 (FADDPv2i64p V128:$Rn)>; 5910def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))), 5911 (FMAXNMPv2i32p V64:$Rn)>; 5912def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))), 5913 (FMAXNMPv2i64p V128:$Rn)>; 5914def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))), 5915 (FMAXPv2i32p V64:$Rn)>; 5916def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))), 5917 (FMAXPv2i64p V128:$Rn)>; 5918def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))), 5919 (FMINNMPv2i32p V64:$Rn)>; 5920def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))), 5921 (FMINNMPv2i64p V128:$Rn)>; 5922def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))), 5923 (FMINPv2i32p V64:$Rn)>; 5924def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))), 5925 (FMINPv2i64p V128:$Rn)>; 5926 5927//---------------------------------------------------------------------------- 5928// AdvSIMD INS/DUP instructions 5929//---------------------------------------------------------------------------- 5930 5931def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; 5932def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; 5933def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; 5934def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; 5935def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; 5936def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; 5937def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; 5938 5939def DUPv2i64lane : SIMDDup64FromElement; 5940def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; 5941def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; 5942def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; 5943def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; 5944def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; 5945def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; 5946 5947// DUP from a 64-bit register to a 64-bit register is just a copy 5948def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))), 5949 (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>; 5950def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))), 5951 (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>; 5952 5953def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), 5954 (v2f32 (DUPv2i32lane 5955 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 5956 (i64 0)))>; 5957def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), 5958 (v4f32 (DUPv4i32lane 5959 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 5960 (i64 0)))>; 5961def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), 5962 (v2f64 (DUPv2i64lane 5963 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), 5964 (i64 0)))>; 5965def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), 5966 (v4f16 (DUPv4i16lane 5967 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5968 (i64 0)))>; 5969def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))), 5970 (v4bf16 (DUPv4i16lane 5971 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5972 (i64 0)))>; 5973def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), 5974 (v8f16 (DUPv8i16lane 5975 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5976 (i64 0)))>; 5977def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))), 5978 (v8bf16 (DUPv8i16lane 5979 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5980 (i64 0)))>; 5981 5982def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 5983 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 5984def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 5985 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 5986 5987def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 5988 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 5989def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 5990 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 5991 5992def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 5993 (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; 5994def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 5995 (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; 5996def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), 5997 (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; 5998 5999// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane 6000// instruction even if the types don't match: we just have to remap the lane 6001// carefully. N.b. this trick only applies to truncations. 6002def VecIndex_x2 : SDNodeXForm<imm, [{ 6003 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); 6004}]>; 6005def VecIndex_x4 : SDNodeXForm<imm, [{ 6006 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); 6007}]>; 6008def VecIndex_x8 : SDNodeXForm<imm, [{ 6009 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); 6010}]>; 6011 6012multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, 6013 ValueType Src128VT, ValueType ScalVT, 6014 Instruction DUP, SDNodeXForm IdxXFORM> { 6015 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), 6016 imm:$idx)))), 6017 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 6018 6019 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), 6020 imm:$idx)))), 6021 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 6022} 6023 6024defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; 6025defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; 6026defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; 6027 6028defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; 6029defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; 6030defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; 6031 6032multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, 6033 SDNodeXForm IdxXFORM> { 6034 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), 6035 imm:$idx))))), 6036 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 6037 6038 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), 6039 imm:$idx))))), 6040 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 6041} 6042 6043defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; 6044defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; 6045defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; 6046 6047defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; 6048defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; 6049defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; 6050 6051// SMOV and UMOV definitions, with some extra patterns for convenience 6052defm SMOV : SMov; 6053defm UMOV : UMov; 6054 6055def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 6056 (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; 6057def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 6058 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 6059def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6060 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 6061def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6062 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 6063def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6064 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 6065def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), 6066 (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; 6067 6068def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 6069 VectorIndexB:$idx)))), i8), 6070 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 6071def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 6072 VectorIndexH:$idx)))), i16), 6073 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 6074 6075// Extracting i8 or i16 elements will have the zero-extend transformed to 6076// an 'and' mask by type legalization since neither i8 nor i16 are legal types 6077// for AArch64. Match these patterns here since UMOV already zeroes out the high 6078// bits of the destination register. 6079def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), 6080 (i32 0xff)), 6081 (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; 6082def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), 6083 (i32 0xffff)), 6084 (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; 6085 6086def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 6087 VectorIndexB:$idx)))), (i64 0xff))), 6088 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>; 6089def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 6090 VectorIndexH:$idx)))), (i64 0xffff))), 6091 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>; 6092 6093defm INS : SIMDIns; 6094 6095def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), 6096 (SUBREG_TO_REG (i32 0), 6097 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6098def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), 6099 (SUBREG_TO_REG (i32 0), 6100 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6101 6102// The top bits will be zero from the FMOVWSr 6103def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))), 6104 (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>; 6105 6106def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), 6107 (SUBREG_TO_REG (i32 0), 6108 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6109def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), 6110 (SUBREG_TO_REG (i32 0), 6111 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6112 6113def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 6114 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6115def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 6116 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6117 6118def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6119 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6120def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6121 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6122 6123def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), 6124 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 6125 (i32 FPR32:$Rn), ssub))>; 6126def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), 6127 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6128 (i32 FPR32:$Rn), ssub))>; 6129 6130def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), 6131 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 6132 (i64 FPR64:$Rn), dsub))>; 6133 6134def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 6135 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6136def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 6137 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6138 6139def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6140 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6141def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6142 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6143 6144def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), 6145 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 6146def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), 6147 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 6148 6149def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), 6150 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; 6151 6152def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), 6153 (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 6154 (EXTRACT_SUBREG 6155 (INSvi16lane 6156 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6157 VectorIndexS:$imm, 6158 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6159 (i64 0)), 6160 dsub)>; 6161 6162def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), 6163 (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>; 6164def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), 6165 (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>; 6166def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), 6167 (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>; 6168def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), 6169 (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>; 6170def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)), 6171 (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>; 6172 6173def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), 6174 (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 6175 (INSvi16lane 6176 V128:$Rn, VectorIndexH:$imm, 6177 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6178 (i64 0))>; 6179 6180def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn), 6181 (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 6182 (EXTRACT_SUBREG 6183 (INSvi16lane 6184 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6185 VectorIndexS:$imm, 6186 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6187 (i64 0)), 6188 dsub)>; 6189 6190def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn), 6191 (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 6192 (INSvi16lane 6193 V128:$Rn, VectorIndexH:$imm, 6194 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6195 (i64 0))>; 6196 6197def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), 6198 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 6199 (EXTRACT_SUBREG 6200 (INSvi32lane 6201 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6202 VectorIndexS:$imm, 6203 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 6204 (i64 0)), 6205 dsub)>; 6206def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), 6207 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 6208 (INSvi32lane 6209 V128:$Rn, VectorIndexS:$imm, 6210 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 6211 (i64 0))>; 6212def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), 6213 (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), 6214 (INSvi64lane 6215 V128:$Rn, VectorIndexD:$imm, 6216 (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), 6217 (i64 0))>; 6218 6219def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))), 6220 (EXTRACT_SUBREG 6221 (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6222 VectorIndexS:$imm, GPR32:$Rm), 6223 dsub)>; 6224def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))), 6225 (EXTRACT_SUBREG 6226 (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6227 VectorIndexH:$imm, GPR32:$Rm), 6228 dsub)>; 6229def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))), 6230 (EXTRACT_SUBREG 6231 (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6232 VectorIndexB:$imm, GPR32:$Rm), 6233 dsub)>; 6234 6235// Copy an element at a constant index in one vector into a constant indexed 6236// element of another. 6237// FIXME refactor to a shared class/dev parameterized on vector type, vector 6238// index type and INS extension 6239def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane 6240 (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), 6241 VectorIndexB:$idx2)), 6242 (v16i8 (INSvi8lane 6243 V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) 6244 )>; 6245def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane 6246 (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), 6247 VectorIndexH:$idx2)), 6248 (v8i16 (INSvi16lane 6249 V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) 6250 )>; 6251def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane 6252 (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), 6253 VectorIndexS:$idx2)), 6254 (v4i32 (INSvi32lane 6255 V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) 6256 )>; 6257def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane 6258 (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), 6259 VectorIndexD:$idx2)), 6260 (v2i64 (INSvi64lane 6261 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) 6262 )>; 6263 6264multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, 6265 ValueType VTScal, Instruction INS> { 6266 def : Pat<(VT128 (vector_insert V128:$src, 6267 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6268 imm:$Immd)), 6269 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; 6270 6271 def : Pat<(VT128 (vector_insert V128:$src, 6272 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6273 imm:$Immd)), 6274 (INS V128:$src, imm:$Immd, 6275 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; 6276 6277 def : Pat<(VT64 (vector_insert V64:$src, 6278 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6279 imm:$Immd)), 6280 (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), 6281 imm:$Immd, V128:$Rn, imm:$Immn), 6282 dsub)>; 6283 6284 def : Pat<(VT64 (vector_insert V64:$src, 6285 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6286 imm:$Immd)), 6287 (EXTRACT_SUBREG 6288 (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, 6289 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), 6290 dsub)>; 6291} 6292 6293defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; 6294defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>; 6295defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; 6296defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; 6297 6298defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>; 6299defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>; 6300defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>; 6301defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>; 6302 6303// Insert from bitcast 6304// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0) 6305def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6306 (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>; 6307def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6308 (EXTRACT_SUBREG 6309 (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)), 6310 imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0), 6311 dsub)>; 6312def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)), 6313 (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>; 6314 6315// bitcast of an extract 6316// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) 6317def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), 6318 (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>; 6319def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))), 6320 (EXTRACT_SUBREG V128:$src, ssub)>; 6321def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), 6322 (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>; 6323def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))), 6324 (EXTRACT_SUBREG V128:$src, dsub)>; 6325 6326// Floating point vector extractions are codegen'd as either a sequence of 6327// subregister extractions, or a MOV (aka DUP here) if 6328// the lane number is anything other than zero. 6329def : Pat<(vector_extract (v2f64 V128:$Rn), 0), 6330 (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; 6331def : Pat<(vector_extract (v4f32 V128:$Rn), 0), 6332 (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; 6333def : Pat<(vector_extract (v8f16 V128:$Rn), 0), 6334 (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6335def : Pat<(vector_extract (v8bf16 V128:$Rn), 0), 6336 (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6337 6338 6339def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), 6340 (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>; 6341def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), 6342 (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>; 6343def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), 6344 (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6345def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), 6346 (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6347 6348// All concat_vectors operations are canonicalised to act on i64 vectors for 6349// AArch64. In the general case we need an instruction, which had just as well be 6350// INS. 6351class ConcatPat<ValueType DstTy, ValueType SrcTy> 6352 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), 6353 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, 6354 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; 6355 6356def : ConcatPat<v2i64, v1i64>; 6357def : ConcatPat<v2f64, v1f64>; 6358def : ConcatPat<v4i32, v2i32>; 6359def : ConcatPat<v4f32, v2f32>; 6360def : ConcatPat<v8i16, v4i16>; 6361def : ConcatPat<v8f16, v4f16>; 6362def : ConcatPat<v8bf16, v4bf16>; 6363def : ConcatPat<v16i8, v8i8>; 6364 6365// If the high lanes are undef, though, we can just ignore them: 6366class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> 6367 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), 6368 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; 6369 6370def : ConcatUndefPat<v2i64, v1i64>; 6371def : ConcatUndefPat<v2f64, v1f64>; 6372def : ConcatUndefPat<v4i32, v2i32>; 6373def : ConcatUndefPat<v4f32, v2f32>; 6374def : ConcatUndefPat<v8i16, v4i16>; 6375def : ConcatUndefPat<v16i8, v8i8>; 6376 6377//---------------------------------------------------------------------------- 6378// AdvSIMD across lanes instructions 6379//---------------------------------------------------------------------------- 6380 6381defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; 6382defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; 6383defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; 6384defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; 6385defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; 6386defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; 6387defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; 6388defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>; 6389defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>; 6390defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>; 6391defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>; 6392 6393multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> { 6394 // Patterns for addv(addlp(x)) ==> addlv 6395 def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, 6396 (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))), 6397 (i64 0))), (i64 0))), 6398 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6399 (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>; 6400 def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))), 6401 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6402 (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>; 6403 def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))), 6404 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>; 6405 6406 // Patterns for addp(addlp(x))) ==> addlv 6407 def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))), 6408 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>; 6409 def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))), 6410 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>; 6411} 6412 6413defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>; 6414defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>; 6415 6416// Patterns for uaddlv(uaddlp(x)) ==> uaddlv 6417def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), 6418 (i64 (EXTRACT_SUBREG 6419 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)), 6420 dsub))>; 6421 6422def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))), 6423 (i32 (EXTRACT_SUBREG 6424 (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)), 6425 ssub))>; 6426 6427// Patterns for across-vector intrinsics, that have a node equivalent, that 6428// returns a vector (with only the low lane defined) instead of a scalar. 6429// In effect, opNode is the same as (scalar_to_vector (IntNode)). 6430multiclass SIMDAcrossLanesIntrinsic<string baseOpc, 6431 SDPatternOperator opNode> { 6432// If a lane instruction caught the vector_extract around opNode, we can 6433// directly match the latter to the instruction. 6434def : Pat<(v8i8 (opNode V64:$Rn)), 6435 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6436 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; 6437def : Pat<(v16i8 (opNode V128:$Rn)), 6438 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6439 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; 6440def : Pat<(v4i16 (opNode V64:$Rn)), 6441 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6442 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; 6443def : Pat<(v8i16 (opNode V128:$Rn)), 6444 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6445 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; 6446def : Pat<(v4i32 (opNode V128:$Rn)), 6447 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6448 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; 6449 6450 6451// If none did, fallback to the explicit patterns, consuming the vector_extract. 6452def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), 6453 (i64 0)), (i64 0))), 6454 (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6455 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), 6456 bsub), ssub)>; 6457def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), 6458 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6459 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), 6460 bsub), ssub)>; 6461def : Pat<(i32 (vector_extract (insert_subvector undef, 6462 (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))), 6463 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6464 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), 6465 hsub), ssub)>; 6466def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), 6467 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6468 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), 6469 hsub), ssub)>; 6470def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), 6471 (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6472 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), 6473 ssub), ssub)>; 6474 6475} 6476 6477multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, 6478 SDPatternOperator opNode> 6479 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6480// If there is a sign extension after this intrinsic, consume it as smov already 6481// performed it 6482def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6483 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)), 6484 (i32 (SMOVvi8to32 6485 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6486 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6487 (i64 0)))>; 6488def : Pat<(i32 (sext_inreg (i32 (vector_extract 6489 (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), 6490 (i32 (SMOVvi8to32 6491 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6492 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6493 (i64 0)))>; 6494def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6495 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)), 6496 (i32 (SMOVvi16to32 6497 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6498 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6499 (i64 0)))>; 6500def : Pat<(i32 (sext_inreg (i32 (vector_extract 6501 (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), 6502 (i32 (SMOVvi16to32 6503 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6504 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6505 (i64 0)))>; 6506} 6507 6508multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, 6509 SDPatternOperator opNode> 6510 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6511// If there is a masking operation keeping only what has been actually 6512// generated, consume it. 6513def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6514 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)), 6515 (i32 (EXTRACT_SUBREG 6516 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6517 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6518 ssub))>; 6519def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), 6520 maski8_or_more)), 6521 (i32 (EXTRACT_SUBREG 6522 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6523 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6524 ssub))>; 6525def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6526 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)), 6527 (i32 (EXTRACT_SUBREG 6528 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6529 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6530 ssub))>; 6531def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), 6532 maski16_or_more)), 6533 (i32 (EXTRACT_SUBREG 6534 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6535 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6536 ssub))>; 6537} 6538 6539defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; 6540// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6541def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), 6542 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6543 6544defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; 6545// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6546def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), 6547 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6548 6549defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; 6550def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), 6551 (SMAXPv2i32 V64:$Rn, V64:$Rn)>; 6552 6553defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; 6554def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), 6555 (SMINPv2i32 V64:$Rn, V64:$Rn)>; 6556 6557defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; 6558def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), 6559 (UMAXPv2i32 V64:$Rn, V64:$Rn)>; 6560 6561defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; 6562def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), 6563 (UMINPv2i32 V64:$Rn, V64:$Rn)>; 6564 6565multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { 6566 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6567 (i32 (SMOVvi16to32 6568 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6569 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6570 (i64 0)))>; 6571def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6572 (i32 (SMOVvi16to32 6573 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6574 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6575 (i64 0)))>; 6576 6577def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6578 (i32 (EXTRACT_SUBREG 6579 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6580 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6581 ssub))>; 6582def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6583 (i32 (EXTRACT_SUBREG 6584 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6585 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6586 ssub))>; 6587 6588def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6589 (i64 (EXTRACT_SUBREG 6590 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6591 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6592 dsub))>; 6593} 6594 6595multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, 6596 Intrinsic intOp> { 6597 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6598 (i32 (EXTRACT_SUBREG 6599 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6600 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6601 ssub))>; 6602def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6603 (i32 (EXTRACT_SUBREG 6604 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6605 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6606 ssub))>; 6607 6608def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6609 (i32 (EXTRACT_SUBREG 6610 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6611 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6612 ssub))>; 6613def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6614 (i32 (EXTRACT_SUBREG 6615 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6616 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6617 ssub))>; 6618 6619def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6620 (i64 (EXTRACT_SUBREG 6621 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6622 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6623 dsub))>; 6624} 6625 6626defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; 6627defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; 6628 6629// The vaddlv_s32 intrinsic gets mapped to SADDLP. 6630def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), 6631 (i64 (EXTRACT_SUBREG 6632 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6633 (SADDLPv2i32_v1i64 V64:$Rn), dsub), 6634 dsub))>; 6635// The vaddlv_u32 intrinsic gets mapped to UADDLP. 6636def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), 6637 (i64 (EXTRACT_SUBREG 6638 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6639 (UADDLPv2i32_v1i64 V64:$Rn), dsub), 6640 dsub))>; 6641 6642//------------------------------------------------------------------------------ 6643// AdvSIMD modified immediate instructions 6644//------------------------------------------------------------------------------ 6645 6646// AdvSIMD BIC 6647defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; 6648// AdvSIMD ORR 6649defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; 6650 6651def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6652def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6653def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6654def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6655 6656def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6657def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6658def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6659def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6660 6661def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6662def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6663def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6664def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6665 6666def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6667def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6668def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6669def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6670 6671// AdvSIMD FMOV 6672def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, 6673 "fmov", ".2d", 6674 [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6675def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, 6676 "fmov", ".2s", 6677 [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6678def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, 6679 "fmov", ".4s", 6680 [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6681let Predicates = [HasNEON, HasFullFP16] in { 6682def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, 6683 "fmov", ".4h", 6684 [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6685def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, 6686 "fmov", ".8h", 6687 [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6688} // Predicates = [HasNEON, HasFullFP16] 6689 6690// AdvSIMD MOVI 6691 6692// EDIT byte mask: scalar 6693let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6694def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", 6695 [(set FPR64:$Rd, simdimmtype10:$imm8)]>; 6696// The movi_edit node has the immediate value already encoded, so we use 6697// a plain imm0_255 here. 6698def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), 6699 (MOVID imm0_255:$shift)>; 6700 6701// EDIT byte mask: 2d 6702 6703// The movi_edit node has the immediate value already encoded, so we use 6704// a plain imm0_255 in the pattern 6705let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6706def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, 6707 simdimmtype10, 6708 "movi", ".2d", 6709 [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; 6710 6711def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6712def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6713def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6714def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6715def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6716def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6717def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6718def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6719 6720def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6721def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6722def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6723def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6724 6725// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the 6726// extract is free and this gives better MachineCSE results. 6727def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6728def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6729def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6730def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6731def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>; 6732def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>; 6733def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>; 6734def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>; 6735 6736def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6737def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6738def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6739def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6740 6741// EDIT per word & halfword: 2s, 4h, 4s, & 8h 6742let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6743defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; 6744 6745let Predicates = [HasNEON] in { 6746 // Using the MOVI to materialize fp constants. 6747 def : Pat<(f32 fpimm32SIMDModImmType4:$in), 6748 (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in), 6749 (i32 24)), 6750 ssub)>; 6751} 6752 6753def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6754def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6755def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6756def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6757 6758def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6759def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6760def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6761def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6762 6763def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6764 (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; 6765def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6766 (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; 6767def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6768 (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; 6769def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6770 (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; 6771 6772let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 6773// EDIT per word: 2s & 4s with MSL shifter 6774def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", 6775 [(set (v2i32 V64:$Rd), 6776 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6777def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", 6778 [(set (v4i32 V128:$Rd), 6779 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6780 6781// Per byte: 8b & 16b 6782def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, 6783 "movi", ".8b", 6784 [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; 6785 6786def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, 6787 "movi", ".16b", 6788 [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; 6789} 6790 6791// AdvSIMD MVNI 6792 6793// EDIT per word & halfword: 2s, 4h, 4s, & 8h 6794let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6795defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; 6796 6797def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6798def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6799def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6800def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6801 6802def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6803def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6804def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6805def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6806 6807def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6808 (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; 6809def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6810 (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; 6811def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6812 (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; 6813def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6814 (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; 6815 6816// EDIT per word: 2s & 4s with MSL shifter 6817let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 6818def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", 6819 [(set (v2i32 V64:$Rd), 6820 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6821def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", 6822 [(set (v4i32 V128:$Rd), 6823 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6824} 6825 6826//---------------------------------------------------------------------------- 6827// AdvSIMD indexed element 6828//---------------------------------------------------------------------------- 6829 6830let hasSideEffects = 0 in { 6831 defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; 6832 defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; 6833} 6834 6835// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the 6836// instruction expects the addend first, while the intrinsic expects it last. 6837 6838// On the other hand, there are quite a few valid combinatorial options due to 6839// the commutativity of multiplication and the fact that (-x) * y = x * (-y). 6840defm : SIMDFPIndexedTiedPatterns<"FMLA", 6841 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>; 6842defm : SIMDFPIndexedTiedPatterns<"FMLA", 6843 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>; 6844 6845defm : SIMDFPIndexedTiedPatterns<"FMLS", 6846 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 6847defm : SIMDFPIndexedTiedPatterns<"FMLS", 6848 TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; 6849defm : SIMDFPIndexedTiedPatterns<"FMLS", 6850 TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; 6851defm : SIMDFPIndexedTiedPatterns<"FMLS", 6852 TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; 6853 6854multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { 6855 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit 6856 // and DUP scalar. 6857 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6858 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 6859 VectorIndexS:$idx))), 6860 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; 6861 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6862 (v2f32 (AArch64duplane32 6863 (v4f32 (insert_subvector undef, 6864 (v2f32 (fneg V64:$Rm)), 6865 (i64 0))), 6866 VectorIndexS:$idx)))), 6867 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 6868 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 6869 VectorIndexS:$idx)>; 6870 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6871 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 6872 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 6873 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 6874 6875 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit 6876 // and DUP scalar. 6877 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6878 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 6879 VectorIndexS:$idx))), 6880 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, 6881 VectorIndexS:$idx)>; 6882 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6883 (v4f32 (AArch64duplane32 6884 (v4f32 (insert_subvector undef, 6885 (v2f32 (fneg V64:$Rm)), 6886 (i64 0))), 6887 VectorIndexS:$idx)))), 6888 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 6889 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 6890 VectorIndexS:$idx)>; 6891 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6892 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 6893 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 6894 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 6895 6896 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar 6897 // (DUPLANE from 64-bit would be trivial). 6898 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 6899 (AArch64duplane64 (v2f64 (fneg V128:$Rm)), 6900 VectorIndexD:$idx))), 6901 (FMLSv2i64_indexed 6902 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; 6903 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 6904 (AArch64dup (f64 (fneg FPR64Op:$Rm))))), 6905 (FMLSv2i64_indexed V128:$Rd, V128:$Rn, 6906 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; 6907 6908 // 2 variants for 32-bit scalar version: extract from .2s or from .4s 6909 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 6910 (vector_extract (v4f32 (fneg V128:$Rm)), 6911 VectorIndexS:$idx))), 6912 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 6913 V128:$Rm, VectorIndexS:$idx)>; 6914 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 6915 (vector_extract (v4f32 (insert_subvector undef, 6916 (v2f32 (fneg V64:$Rm)), 6917 (i64 0))), 6918 VectorIndexS:$idx))), 6919 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 6920 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; 6921 6922 // 1 variant for 64-bit scalar version: extract from .1d or from .2d 6923 def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), 6924 (vector_extract (v2f64 (fneg V128:$Rm)), 6925 VectorIndexS:$idx))), 6926 (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, 6927 V128:$Rm, VectorIndexS:$idx)>; 6928} 6929 6930defm : FMLSIndexedAfterNegPatterns< 6931 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 6932defm : FMLSIndexedAfterNegPatterns< 6933 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >; 6934 6935defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; 6936defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>; 6937 6938def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 6939 (FMULv2i32_indexed V64:$Rn, 6940 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 6941 (i64 0))>; 6942def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 6943 (FMULv4i32_indexed V128:$Rn, 6944 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 6945 (i64 0))>; 6946def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), 6947 (FMULv2i64_indexed V128:$Rn, 6948 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), 6949 (i64 0))>; 6950 6951defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; 6952defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 6953 6954defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane, 6955 int_aarch64_neon_sqdmulh_laneq>; 6956defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane, 6957 int_aarch64_neon_sqrdmulh_laneq>; 6958 6959// Generated by MachineCombine 6960defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>; 6961defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; 6962 6963defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; 6964defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", 6965 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 6966defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", 6967 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 6968defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>; 6969defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", 6970 int_aarch64_neon_sqadd>; 6971defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", 6972 int_aarch64_neon_sqsub>; 6973defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", 6974 int_aarch64_neon_sqrdmlah>; 6975defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", 6976 int_aarch64_neon_sqrdmlsh>; 6977defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; 6978defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", 6979 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 6980defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", 6981 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 6982defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>; 6983 6984// A scalar sqdmull with the second operand being a vector lane can be 6985// handled directly with the indexed instruction encoding. 6986def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 6987 (vector_extract (v4i32 V128:$Vm), 6988 VectorIndexS:$idx)), 6989 (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; 6990 6991//---------------------------------------------------------------------------- 6992// AdvSIMD scalar shift instructions 6993//---------------------------------------------------------------------------- 6994defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; 6995defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; 6996defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; 6997defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; 6998// Codegen patterns for the above. We don't put these directly on the 6999// instructions because TableGen's type inference can't handle the truth. 7000// Having the same base pattern for fp <--> int totally freaks it out. 7001def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), 7002 (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; 7003def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), 7004 (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; 7005def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), 7006 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 7007def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), 7008 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 7009def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), 7010 vecshiftR64:$imm)), 7011 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 7012def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), 7013 vecshiftR64:$imm)), 7014 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 7015def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), 7016 (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 7017def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 7018 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7019def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), 7020 vecshiftR64:$imm)), 7021 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7022def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 7023 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7024def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), 7025 vecshiftR64:$imm)), 7026 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7027def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), 7028 (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 7029 7030// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported. 7031 7032def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), 7033 (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 7034def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), 7035 (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 7036def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 7037 (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; 7038def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp 7039 (and FPR32:$Rn, (i32 65535)), 7040 vecshiftR16:$imm)), 7041 (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 7042def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), 7043 (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 7044def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 7045 (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; 7046def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), 7047 (i32 (INSERT_SUBREG 7048 (i32 (IMPLICIT_DEF)), 7049 (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), 7050 hsub))>; 7051def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), 7052 (i64 (INSERT_SUBREG 7053 (i64 (IMPLICIT_DEF)), 7054 (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), 7055 hsub))>; 7056def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), 7057 (i32 (INSERT_SUBREG 7058 (i32 (IMPLICIT_DEF)), 7059 (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), 7060 hsub))>; 7061def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), 7062 (i64 (INSERT_SUBREG 7063 (i64 (IMPLICIT_DEF)), 7064 (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), 7065 hsub))>; 7066def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 7067 (i32 (INSERT_SUBREG 7068 (i32 (IMPLICIT_DEF)), 7069 (FACGE16 FPR16:$Rn, FPR16:$Rm), 7070 hsub))>; 7071def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 7072 (i32 (INSERT_SUBREG 7073 (i32 (IMPLICIT_DEF)), 7074 (FACGT16 FPR16:$Rn, FPR16:$Rm), 7075 hsub))>; 7076 7077defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; 7078defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; 7079defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", 7080 int_aarch64_neon_sqrshrn>; 7081defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", 7082 int_aarch64_neon_sqrshrun>; 7083defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 7084defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 7085defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", 7086 int_aarch64_neon_sqshrn>; 7087defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", 7088 int_aarch64_neon_sqshrun>; 7089defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; 7090defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; 7091defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", 7092 TriOpFrag<(add node:$LHS, 7093 (AArch64srshri node:$MHS, node:$RHS))>>; 7094defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; 7095defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", 7096 TriOpFrag<(add_and_or_is_add node:$LHS, 7097 (AArch64vashr node:$MHS, node:$RHS))>>; 7098defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", 7099 int_aarch64_neon_uqrshrn>; 7100defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 7101defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", 7102 int_aarch64_neon_uqshrn>; 7103defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; 7104defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", 7105 TriOpFrag<(add node:$LHS, 7106 (AArch64urshri node:$MHS, node:$RHS))>>; 7107defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; 7108defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", 7109 TriOpFrag<(add_and_or_is_add node:$LHS, 7110 (AArch64vlshr node:$MHS, node:$RHS))>>; 7111 7112//---------------------------------------------------------------------------- 7113// AdvSIMD vector shift instructions 7114//---------------------------------------------------------------------------- 7115defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; 7116defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; 7117defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", 7118 int_aarch64_neon_vcvtfxs2fp>; 7119defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", 7120 BinOpFrag<(trunc (AArch64roundingvlshr node:$LHS, node:$RHS))>>; 7121defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; 7122 7123// X << 1 ==> X + X 7124class SHLToADDPat<ValueType ty, RegisterClass regtype> 7125 : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))), 7126 (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>; 7127 7128def : SHLToADDPat<v16i8, FPR128>; 7129def : SHLToADDPat<v8i16, FPR128>; 7130def : SHLToADDPat<v4i32, FPR128>; 7131def : SHLToADDPat<v2i64, FPR128>; 7132def : SHLToADDPat<v8i8, FPR64>; 7133def : SHLToADDPat<v4i16, FPR64>; 7134def : SHLToADDPat<v2i32, FPR64>; 7135 7136defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", 7137 BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; 7138defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>; 7139def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 7140 (i32 vecshiftL64:$imm))), 7141 (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; 7142defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", 7143 int_aarch64_neon_sqrshrn>; 7144defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", 7145 int_aarch64_neon_sqrshrun>; 7146defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 7147defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 7148defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", 7149 int_aarch64_neon_sqshrn>; 7150defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", 7151 int_aarch64_neon_sqshrun>; 7152defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>; 7153def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 7154 (i32 vecshiftR64:$imm))), 7155 (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; 7156defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; 7157defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", 7158 TriOpFrag<(add node:$LHS, 7159 (AArch64srshri node:$MHS, node:$RHS))> >; 7160defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", 7161 BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; 7162 7163defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; 7164defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", 7165 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; 7166defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", 7167 int_aarch64_neon_vcvtfxu2fp>; 7168defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", 7169 int_aarch64_neon_uqrshrn>; 7170defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 7171defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", 7172 int_aarch64_neon_uqshrn>; 7173defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; 7174defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", 7175 TriOpFrag<(add node:$LHS, 7176 (AArch64urshri node:$MHS, node:$RHS))> >; 7177defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", 7178 BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; 7179defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; 7180defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", 7181 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; 7182 7183// RADDHN patterns for when RSHRN shifts by half the size of the vector element 7184def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))), 7185 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; 7186def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))), 7187 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; 7188let AddedComplexity = 5 in 7189def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))), 7190 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; 7191 7192// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element 7193def : Pat<(v16i8 (concat_vectors 7194 (v8i8 V64:$Vd), 7195 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))), 7196 (RADDHNv8i16_v16i8 7197 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7198 (v8i16 (MOVIv2d_ns (i32 0))))>; 7199def : Pat<(v8i16 (concat_vectors 7200 (v4i16 V64:$Vd), 7201 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))), 7202 (RADDHNv4i32_v8i16 7203 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7204 (v4i32 (MOVIv2d_ns (i32 0))))>; 7205let AddedComplexity = 5 in 7206def : Pat<(v4i32 (concat_vectors 7207 (v2i32 V64:$Vd), 7208 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))), 7209 (RADDHNv2i64_v4i32 7210 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7211 (v2i64 (MOVIv2d_ns (i32 0))))>; 7212 7213// SHRN patterns for when a logical right shift was used instead of arithmetic 7214// (the immediate guarantees no sign bits actually end up in the result so it 7215// doesn't matter). 7216def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), 7217 (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; 7218def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), 7219 (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; 7220def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), 7221 (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; 7222 7223def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), 7224 (trunc (AArch64vlshr (v8i16 V128:$Rn), 7225 vecshiftR16Narrow:$imm)))), 7226 (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7227 V128:$Rn, vecshiftR16Narrow:$imm)>; 7228def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), 7229 (trunc (AArch64vlshr (v4i32 V128:$Rn), 7230 vecshiftR32Narrow:$imm)))), 7231 (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7232 V128:$Rn, vecshiftR32Narrow:$imm)>; 7233def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), 7234 (trunc (AArch64vlshr (v2i64 V128:$Rn), 7235 vecshiftR64Narrow:$imm)))), 7236 (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7237 V128:$Rn, vecshiftR32Narrow:$imm)>; 7238 7239// Vector sign and zero extensions are implemented with SSHLL and USSHLL. 7240// Anyexts are implemented as zexts. 7241def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; 7242def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 7243def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 7244def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; 7245def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 7246def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 7247def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; 7248def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 7249def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 7250// Also match an extend from the upper half of a 128 bit source register. 7251def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 7252 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 7253def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 7254 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 7255def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 7256 (SSHLLv16i8_shift V128:$Rn, (i32 0))>; 7257def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 7258 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 7259def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 7260 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 7261def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 7262 (SSHLLv8i16_shift V128:$Rn, (i32 0))>; 7263def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 7264 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 7265def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 7266 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 7267def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 7268 (SSHLLv4i32_shift V128:$Rn, (i32 0))>; 7269 7270// Vector shift sxtl aliases 7271def : InstAlias<"sxtl.8h $dst, $src1", 7272 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7273def : InstAlias<"sxtl $dst.8h, $src1.8b", 7274 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7275def : InstAlias<"sxtl.4s $dst, $src1", 7276 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7277def : InstAlias<"sxtl $dst.4s, $src1.4h", 7278 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7279def : InstAlias<"sxtl.2d $dst, $src1", 7280 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7281def : InstAlias<"sxtl $dst.2d, $src1.2s", 7282 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7283 7284// Vector shift sxtl2 aliases 7285def : InstAlias<"sxtl2.8h $dst, $src1", 7286 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7287def : InstAlias<"sxtl2 $dst.8h, $src1.16b", 7288 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7289def : InstAlias<"sxtl2.4s $dst, $src1", 7290 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7291def : InstAlias<"sxtl2 $dst.4s, $src1.8h", 7292 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7293def : InstAlias<"sxtl2.2d $dst, $src1", 7294 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7295def : InstAlias<"sxtl2 $dst.2d, $src1.4s", 7296 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7297 7298// Vector shift uxtl aliases 7299def : InstAlias<"uxtl.8h $dst, $src1", 7300 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7301def : InstAlias<"uxtl $dst.8h, $src1.8b", 7302 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7303def : InstAlias<"uxtl.4s $dst, $src1", 7304 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7305def : InstAlias<"uxtl $dst.4s, $src1.4h", 7306 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7307def : InstAlias<"uxtl.2d $dst, $src1", 7308 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7309def : InstAlias<"uxtl $dst.2d, $src1.2s", 7310 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7311 7312// Vector shift uxtl2 aliases 7313def : InstAlias<"uxtl2.8h $dst, $src1", 7314 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7315def : InstAlias<"uxtl2 $dst.8h, $src1.16b", 7316 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7317def : InstAlias<"uxtl2.4s $dst, $src1", 7318 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7319def : InstAlias<"uxtl2 $dst.4s, $src1.8h", 7320 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7321def : InstAlias<"uxtl2.2d $dst, $src1", 7322 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7323def : InstAlias<"uxtl2 $dst.2d, $src1.4s", 7324 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7325 7326// If an integer is about to be converted to a floating point value, 7327// just load it on the floating point unit. 7328// These patterns are more complex because floating point loads do not 7329// support sign extension. 7330// The sign extension has to be explicitly added and is only supported for 7331// one step: byte-to-half, half-to-word, word-to-doubleword. 7332// SCVTF GPR -> FPR is 9 cycles. 7333// SCVTF FPR -> FPR is 4 cyclces. 7334// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. 7335// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR 7336// and still being faster. 7337// However, this is not good for code size. 7338// 8-bits -> float. 2 sizes step-up. 7339class SExtLoadi8CVTf32Pat<dag addrmode, dag INST> 7340 : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), 7341 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7342 (SSHLLv4i16_shift 7343 (f64 7344 (EXTRACT_SUBREG 7345 (SSHLLv8i8_shift 7346 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7347 INST, 7348 bsub), 7349 0), 7350 dsub)), 7351 0), 7352 ssub)))>, 7353 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7354 7355def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), 7356 (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; 7357def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), 7358 (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; 7359def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), 7360 (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; 7361def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), 7362 (LDURBi GPR64sp:$Rn, simm9:$offset)>; 7363 7364// 16-bits -> float. 1 size step-up. 7365class SExtLoadi16CVTf32Pat<dag addrmode, dag INST> 7366 : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7367 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7368 (SSHLLv4i16_shift 7369 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7370 INST, 7371 hsub), 7372 0), 7373 ssub)))>, 7374 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7375 7376def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7377 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7378def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7379 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7380def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7381 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7382def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7383 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7384 7385// 32-bits to 32-bits are handled in target specific dag combine: 7386// performIntToFpCombine. 7387// 64-bits integer to 32-bits floating point, not possible with 7388// SCVTF on floating point registers (both source and destination 7389// must have the same size). 7390 7391// Here are the patterns for 8, 16, 32, and 64-bits to double. 7392// 8-bits -> double. 3 size step-up: give up. 7393// 16-bits -> double. 2 size step. 7394class SExtLoadi16CVTf64Pat<dag addrmode, dag INST> 7395 : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7396 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7397 (SSHLLv2i32_shift 7398 (f64 7399 (EXTRACT_SUBREG 7400 (SSHLLv4i16_shift 7401 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7402 INST, 7403 hsub), 7404 0), 7405 dsub)), 7406 0), 7407 dsub)))>, 7408 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7409 7410def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7411 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7412def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7413 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7414def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7415 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7416def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7417 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7418// 32-bits -> double. 1 size step-up. 7419class SExtLoadi32CVTf64Pat<dag addrmode, dag INST> 7420 : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), 7421 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7422 (SSHLLv2i32_shift 7423 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7424 INST, 7425 ssub), 7426 0), 7427 dsub)))>, 7428 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7429 7430def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), 7431 (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; 7432def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), 7433 (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; 7434def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), 7435 (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; 7436def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), 7437 (LDURSi GPR64sp:$Rn, simm9:$offset)>; 7438 7439// 64-bits -> double are handled in target specific dag combine: 7440// performIntToFpCombine. 7441 7442 7443//---------------------------------------------------------------------------- 7444// AdvSIMD Load-Store Structure 7445//---------------------------------------------------------------------------- 7446defm LD1 : SIMDLd1Multiple<"ld1">; 7447defm LD2 : SIMDLd2Multiple<"ld2">; 7448defm LD3 : SIMDLd3Multiple<"ld3">; 7449defm LD4 : SIMDLd4Multiple<"ld4">; 7450 7451defm ST1 : SIMDSt1Multiple<"st1">; 7452defm ST2 : SIMDSt2Multiple<"st2">; 7453defm ST3 : SIMDSt3Multiple<"st3">; 7454defm ST4 : SIMDSt4Multiple<"st4">; 7455 7456class Ld1Pat<ValueType ty, Instruction INST> 7457 : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; 7458 7459def : Ld1Pat<v16i8, LD1Onev16b>; 7460def : Ld1Pat<v8i16, LD1Onev8h>; 7461def : Ld1Pat<v4i32, LD1Onev4s>; 7462def : Ld1Pat<v2i64, LD1Onev2d>; 7463def : Ld1Pat<v8i8, LD1Onev8b>; 7464def : Ld1Pat<v4i16, LD1Onev4h>; 7465def : Ld1Pat<v2i32, LD1Onev2s>; 7466def : Ld1Pat<v1i64, LD1Onev1d>; 7467 7468class St1Pat<ValueType ty, Instruction INST> 7469 : Pat<(store ty:$Vt, GPR64sp:$Rn), 7470 (INST ty:$Vt, GPR64sp:$Rn)>; 7471 7472def : St1Pat<v16i8, ST1Onev16b>; 7473def : St1Pat<v8i16, ST1Onev8h>; 7474def : St1Pat<v4i32, ST1Onev4s>; 7475def : St1Pat<v2i64, ST1Onev2d>; 7476def : St1Pat<v8i8, ST1Onev8b>; 7477def : St1Pat<v4i16, ST1Onev4h>; 7478def : St1Pat<v2i32, ST1Onev2s>; 7479def : St1Pat<v1i64, ST1Onev1d>; 7480 7481//--- 7482// Single-element 7483//--- 7484 7485defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; 7486defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; 7487defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; 7488defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; 7489let mayLoad = 1, hasSideEffects = 0 in { 7490defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; 7491defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; 7492defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; 7493defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; 7494defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; 7495defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; 7496defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; 7497defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; 7498defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; 7499defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; 7500defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; 7501defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; 7502defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; 7503defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; 7504defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; 7505defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; 7506} 7507 7508def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7509 (LD1Rv8b GPR64sp:$Rn)>; 7510def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7511 (LD1Rv16b GPR64sp:$Rn)>; 7512def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7513 (LD1Rv4h GPR64sp:$Rn)>; 7514def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7515 (LD1Rv8h GPR64sp:$Rn)>; 7516def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7517 (LD1Rv2s GPR64sp:$Rn)>; 7518def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7519 (LD1Rv4s GPR64sp:$Rn)>; 7520def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7521 (LD1Rv2d GPR64sp:$Rn)>; 7522def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7523 (LD1Rv1d GPR64sp:$Rn)>; 7524 7525def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7526 (LD1Rv8b GPR64sp:$Rn)>; 7527def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))), 7528 (LD1Rv16b GPR64sp:$Rn)>; 7529def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7530 (LD1Rv4h GPR64sp:$Rn)>; 7531def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))), 7532 (LD1Rv8h GPR64sp:$Rn)>; 7533def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7534 (LD1Rv2s GPR64sp:$Rn)>; 7535def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))), 7536 (LD1Rv4s GPR64sp:$Rn)>; 7537def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))), 7538 (LD1Rv2d GPR64sp:$Rn)>; 7539 7540// Grab the floating point version too 7541def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7542 (LD1Rv2s GPR64sp:$Rn)>; 7543def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7544 (LD1Rv4s GPR64sp:$Rn)>; 7545def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7546 (LD1Rv2d GPR64sp:$Rn)>; 7547def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7548 (LD1Rv1d GPR64sp:$Rn)>; 7549def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7550 (LD1Rv4h GPR64sp:$Rn)>; 7551def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7552 (LD1Rv8h GPR64sp:$Rn)>; 7553def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7554 (LD1Rv4h GPR64sp:$Rn)>; 7555def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7556 (LD1Rv8h GPR64sp:$Rn)>; 7557 7558class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, 7559 ValueType VTy, ValueType STy, Instruction LD1> 7560 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7561 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7562 (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; 7563 7564def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; 7565def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; 7566def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; 7567def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; 7568def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; 7569def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; 7570def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>; 7571def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>; 7572 7573// Generate LD1 for extload if memory type does not match the 7574// destination type, for example: 7575// 7576// (v4i32 (insert_vector_elt (load anyext from i8) idx)) 7577// 7578// In this case, the index must be adjusted to match LD1 type. 7579// 7580class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand 7581 VecIndex, ValueType VTy, ValueType STy, 7582 Instruction LD1, SDNodeXForm IdxOp> 7583 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7584 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7585 (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>; 7586 7587class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex, 7588 ValueType VTy, ValueType STy, Instruction LD1, 7589 SDNodeXForm IdxOp> 7590 : Pat<(vector_insert (VTy VecListOne64:$Rd), 7591 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7592 (EXTRACT_SUBREG 7593 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 7594 (IdxOp VecIndex:$idx), GPR64sp:$Rn), 7595 dsub)>; 7596 7597def VectorIndexStoH : SDNodeXForm<imm, [{ 7598 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7599}]>; 7600def VectorIndexStoB : SDNodeXForm<imm, [{ 7601 return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64); 7602}]>; 7603def VectorIndexHtoB : SDNodeXForm<imm, [{ 7604 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7605}]>; 7606 7607def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>; 7608def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>; 7609def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>; 7610 7611def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>; 7612def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>; 7613def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>; 7614 7615// Same as above, but the first element is populated using 7616// scalar_to_vector + insert_subvector instead of insert_vector_elt. 7617let Predicates = [IsNeonAvailable] in { 7618 class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy, 7619 SDPatternOperator ExtLoad, Instruction LD1> 7620 : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))), 7621 (ResultTy (EXTRACT_SUBREG 7622 (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>; 7623 7624 def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>; 7625 def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>; 7626 def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>; 7627} 7628class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, 7629 ValueType VTy, ValueType STy, Instruction LD1> 7630 : Pat<(vector_insert (VTy VecListOne64:$Rd), 7631 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7632 (EXTRACT_SUBREG 7633 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 7634 VecIndex:$idx, GPR64sp:$Rn), 7635 dsub)>; 7636 7637def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; 7638def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; 7639def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; 7640def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; 7641def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>; 7642def : Ld1Lane64Pat<load, VectorIndexH, v4bf16, bf16, LD1i16>; 7643 7644 7645defm LD1 : SIMDLdSt1SingleAliases<"ld1">; 7646defm LD2 : SIMDLdSt2SingleAliases<"ld2">; 7647defm LD3 : SIMDLdSt3SingleAliases<"ld3">; 7648defm LD4 : SIMDLdSt4SingleAliases<"ld4">; 7649 7650// Stores 7651defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; 7652defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; 7653defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; 7654defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; 7655 7656let AddedComplexity = 19 in 7657class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, 7658 ValueType VTy, ValueType STy, Instruction ST1> 7659 : Pat<(scalar_store 7660 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7661 GPR64sp:$Rn), 7662 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; 7663 7664def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; 7665def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; 7666def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; 7667def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; 7668def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; 7669def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; 7670def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>; 7671def : St1Lane128Pat<store, VectorIndexH, v8bf16, bf16, ST1i16>; 7672 7673let AddedComplexity = 19 in 7674class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, 7675 ValueType VTy, ValueType STy, Instruction ST1> 7676 : Pat<(scalar_store 7677 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7678 GPR64sp:$Rn), 7679 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7680 VecIndex:$idx, GPR64sp:$Rn)>; 7681 7682def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; 7683def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; 7684def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; 7685def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; 7686def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>; 7687def : St1Lane64Pat<store, VectorIndexH, v4bf16, bf16, ST1i16>; 7688 7689multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, 7690 ValueType VTy, ValueType STy, Instruction ST1, 7691 int offset> { 7692 def : Pat<(scalar_store 7693 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7694 GPR64sp:$Rn, offset), 7695 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7696 VecIndex:$idx, GPR64sp:$Rn, XZR)>; 7697 7698 def : Pat<(scalar_store 7699 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7700 GPR64sp:$Rn, GPR64:$Rm), 7701 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7702 VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 7703} 7704 7705defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; 7706defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, 7707 2>; 7708defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; 7709defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; 7710defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; 7711defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; 7712defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>; 7713defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>; 7714 7715multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, 7716 ValueType VTy, ValueType STy, Instruction ST1, 7717 int offset> { 7718 def : Pat<(scalar_store 7719 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7720 GPR64sp:$Rn, offset), 7721 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; 7722 7723 def : Pat<(scalar_store 7724 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7725 GPR64sp:$Rn, GPR64:$Rm), 7726 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 7727} 7728 7729defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, 7730 1>; 7731defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, 7732 2>; 7733defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; 7734defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; 7735defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; 7736defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; 7737defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>; 7738defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>; 7739 7740let mayStore = 1, hasSideEffects = 0 in { 7741defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; 7742defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; 7743defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; 7744defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; 7745defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; 7746defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; 7747defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; 7748defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; 7749defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; 7750defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; 7751defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; 7752defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; 7753} 7754 7755defm ST1 : SIMDLdSt1SingleAliases<"st1">; 7756defm ST2 : SIMDLdSt2SingleAliases<"st2">; 7757defm ST3 : SIMDLdSt3SingleAliases<"st3">; 7758defm ST4 : SIMDLdSt4SingleAliases<"st4">; 7759 7760//---------------------------------------------------------------------------- 7761// Crypto extensions 7762//---------------------------------------------------------------------------- 7763 7764let Predicates = [HasAES] in { 7765def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; 7766def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; 7767def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; 7768def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; 7769} 7770 7771// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required 7772// for AES fusion on some CPUs. 7773let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 7774def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 7775 Sched<[WriteVq]>; 7776def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 7777 Sched<[WriteVq]>; 7778} 7779 7780// Only use constrained versions of AES(I)MC instructions if they are paired with 7781// AESE/AESD. 7782def : Pat<(v16i8 (int_aarch64_crypto_aesmc 7783 (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), 7784 (v16i8 V128:$src2))))), 7785 (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), 7786 (v16i8 V128:$src2)))))>, 7787 Requires<[HasFuseAES]>; 7788 7789def : Pat<(v16i8 (int_aarch64_crypto_aesimc 7790 (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), 7791 (v16i8 V128:$src2))))), 7792 (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), 7793 (v16i8 V128:$src2)))))>, 7794 Requires<[HasFuseAES]>; 7795 7796let Predicates = [HasSHA2] in { 7797def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; 7798def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; 7799def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; 7800def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; 7801def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; 7802def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; 7803def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; 7804 7805def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; 7806def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; 7807def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; 7808} 7809 7810//---------------------------------------------------------------------------- 7811// Compiler-pseudos 7812//---------------------------------------------------------------------------- 7813// FIXME: Like for X86, these should go in their own separate .td file. 7814 7815// For an anyext, we don't care what the high bits are, so we can perform an 7816// INSERT_SUBREF into an IMPLICIT_DEF. 7817def : Pat<(i64 (anyext GPR32:$src)), 7818 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; 7819 7820// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and 7821// then assert the extension has happened. 7822def : Pat<(i64 (zext GPR32:$src)), 7823 (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 7824 7825// To sign extend, we use a signed bitfield move instruction (SBFM) on the 7826// containing super-reg. 7827def : Pat<(i64 (sext GPR32:$src)), 7828 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 7829def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; 7830def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; 7831def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; 7832def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; 7833def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; 7834def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; 7835def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; 7836 7837def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), 7838 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 7839 (i64 (i32shift_sext_i8 imm0_31:$imm)))>; 7840def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), 7841 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 7842 (i64 (i64shift_sext_i8 imm0_63:$imm)))>; 7843 7844def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), 7845 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 7846 (i64 (i32shift_sext_i16 imm0_31:$imm)))>; 7847def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), 7848 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 7849 (i64 (i64shift_sext_i16 imm0_63:$imm)))>; 7850 7851def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), 7852 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7853 (i64 (i64shift_a imm0_63:$imm)), 7854 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 7855 7856def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)), 7857 (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7858 (i64 (i64shift_a imm0_63:$imm)), 7859 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 7860 7861// sra patterns have an AddedComplexity of 10, so make sure we have a higher 7862// AddedComplexity for the following patterns since we want to match sext + sra 7863// patterns before we attempt to match a single sra node. 7864let AddedComplexity = 20 in { 7865// We support all sext + sra combinations which preserve at least one bit of the 7866// original value which is to be sign extended. E.g. we support shifts up to 7867// bitwidth-1 bits. 7868def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), 7869 (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; 7870def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), 7871 (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; 7872 7873def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), 7874 (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; 7875def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), 7876 (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; 7877 7878def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), 7879 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7880 (i64 imm0_31:$imm), 31)>; 7881} // AddedComplexity = 20 7882 7883// To truncate, we can simply extract from a subregister. 7884def : Pat<(i32 (trunc GPR64sp:$src)), 7885 (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; 7886 7887// __builtin_trap() uses the BRK instruction on AArch64. 7888def : Pat<(trap), (BRK 1)>; 7889def : Pat<(debugtrap), (BRK 0xF000)>; 7890 7891def ubsan_trap_xform : SDNodeXForm<timm, [{ 7892 return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32); 7893}]>; 7894 7895def ubsan_trap_imm : TImmLeaf<i32, [{ 7896 return isUInt<8>(Imm); 7897}], ubsan_trap_xform>; 7898 7899def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>; 7900 7901// Multiply high patterns which multiply the lower subvector using smull/umull 7902// and the upper subvector with smull2/umull2. Then shuffle the high the high 7903// part of both results together. 7904def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)), 7905 (UZP2v16i8 7906 (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 7907 (EXTRACT_SUBREG V128:$Rm, dsub)), 7908 (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 7909def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)), 7910 (UZP2v8i16 7911 (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 7912 (EXTRACT_SUBREG V128:$Rm, dsub)), 7913 (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 7914def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)), 7915 (UZP2v4i32 7916 (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 7917 (EXTRACT_SUBREG V128:$Rm, dsub)), 7918 (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 7919 7920def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)), 7921 (UZP2v16i8 7922 (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 7923 (EXTRACT_SUBREG V128:$Rm, dsub)), 7924 (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 7925def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)), 7926 (UZP2v8i16 7927 (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 7928 (EXTRACT_SUBREG V128:$Rm, dsub)), 7929 (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 7930def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)), 7931 (UZP2v4i32 7932 (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 7933 (EXTRACT_SUBREG V128:$Rm, dsub)), 7934 (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 7935 7936// Conversions within AdvSIMD types in the same register size are free. 7937// But because we need a consistent lane ordering, in big endian many 7938// conversions require one or more REV instructions. 7939// 7940// Consider a simple memory load followed by a bitconvert then a store. 7941// v0 = load v2i32 7942// v1 = BITCAST v2i32 v0 to v4i16 7943// store v4i16 v2 7944// 7945// In big endian mode every memory access has an implicit byte swap. LDR and 7946// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that 7947// is, they treat the vector as a sequence of elements to be byte-swapped. 7948// The two pairs of instructions are fundamentally incompatible. We've decided 7949// to use LD1/ST1 only to simplify compiler implementation. 7950// 7951// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes 7952// the original code sequence: 7953// v0 = load v2i32 7954// v1 = REV v2i32 (implicit) 7955// v2 = BITCAST v2i32 v1 to v4i16 7956// v3 = REV v4i16 v2 (implicit) 7957// store v4i16 v3 7958// 7959// But this is now broken - the value stored is different to the value loaded 7960// due to lane reordering. To fix this, on every BITCAST we must perform two 7961// other REVs: 7962// v0 = load v2i32 7963// v1 = REV v2i32 (implicit) 7964// v2 = REV v2i32 7965// v3 = BITCAST v2i32 v2 to v4i16 7966// v4 = REV v4i16 7967// v5 = REV v4i16 v4 (implicit) 7968// store v4i16 v5 7969// 7970// This means an extra two instructions, but actually in most cases the two REV 7971// instructions can be combined into one. For example: 7972// (REV64_2s (REV64_4h X)) === (REV32_4h X) 7973// 7974// There is also no 128-bit REV instruction. This must be synthesized with an 7975// EXT instruction. 7976// 7977// Most bitconverts require some sort of conversion. The only exceptions are: 7978// a) Identity conversions - vNfX <-> vNiX 7979// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX 7980// 7981 7982// Natural vector casts (64 bit) 7983foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 7984 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 7985 def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))), 7986 (VT FPR64:$src)>; 7987 7988// Natural vector casts (128 bit) 7989foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7990 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7991 def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))), 7992 (VT FPR128:$src)>; 7993 7994let Predicates = [IsLE] in { 7995def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7996def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7997def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7998def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7999def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8000def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8001 8002def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 8003 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8004def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 8005 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8006def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 8007 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8008def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 8009 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8010def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 8011 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8012def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 8013 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8014def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 8015 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8016} 8017let Predicates = [IsBE] in { 8018def : Pat<(v8i8 (bitconvert GPR64:$Xn)), 8019 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8020def : Pat<(v4i16 (bitconvert GPR64:$Xn)), 8021 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8022def : Pat<(v2i32 (bitconvert GPR64:$Xn)), 8023 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8024def : Pat<(v4f16 (bitconvert GPR64:$Xn)), 8025 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8026def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), 8027 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8028def : Pat<(v2f32 (bitconvert GPR64:$Xn)), 8029 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8030 8031def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 8032 (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8033def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 8034 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8035def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 8036 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8037def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 8038 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8039def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 8040 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8041def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 8042 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8043} 8044def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8045def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8046def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), 8047 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8048def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), 8049 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8050def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), 8051 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8052def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; 8053 8054def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), 8055 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; 8056def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), 8057 (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; 8058def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), 8059 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8060def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), 8061 (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; 8062def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 8063 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8064 8065def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>; 8066def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>; 8067 8068let Predicates = [IsLE] in { 8069def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; 8070def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; 8071def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; 8072def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; 8073def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>; 8074def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; 8075} 8076let Predicates = [IsBE] in { 8077def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), 8078 (v1i64 (REV64v2i32 FPR64:$src))>; 8079def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), 8080 (v1i64 (REV64v4i16 FPR64:$src))>; 8081def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), 8082 (v1i64 (REV64v8i8 FPR64:$src))>; 8083def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), 8084 (v1i64 (REV64v4i16 FPR64:$src))>; 8085def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), 8086 (v1i64 (REV64v4i16 FPR64:$src))>; 8087def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), 8088 (v1i64 (REV64v2i32 FPR64:$src))>; 8089} 8090def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; 8091def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 8092 8093let Predicates = [IsLE] in { 8094def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; 8095def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; 8096def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; 8097def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 8098def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; 8099def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; 8100def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>; 8101} 8102let Predicates = [IsBE] in { 8103def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), 8104 (v2i32 (REV64v2i32 FPR64:$src))>; 8105def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), 8106 (v2i32 (REV32v4i16 FPR64:$src))>; 8107def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), 8108 (v2i32 (REV32v8i8 FPR64:$src))>; 8109def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), 8110 (v2i32 (REV64v2i32 FPR64:$src))>; 8111def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), 8112 (v2i32 (REV64v2i32 FPR64:$src))>; 8113def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), 8114 (v2i32 (REV32v4i16 FPR64:$src))>; 8115def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), 8116 (v2i32 (REV32v4i16 FPR64:$src))>; 8117} 8118def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; 8119 8120let Predicates = [IsLE] in { 8121def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; 8122def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; 8123def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; 8124def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 8125def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; 8126def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; 8127} 8128let Predicates = [IsBE] in { 8129def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), 8130 (v4i16 (REV64v4i16 FPR64:$src))>; 8131def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), 8132 (v4i16 (REV32v4i16 FPR64:$src))>; 8133def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), 8134 (v4i16 (REV16v8i8 FPR64:$src))>; 8135def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), 8136 (v4i16 (REV64v4i16 FPR64:$src))>; 8137def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), 8138 (v4i16 (REV32v4i16 FPR64:$src))>; 8139def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), 8140 (v4i16 (REV64v4i16 FPR64:$src))>; 8141} 8142def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; 8143def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>; 8144 8145let Predicates = [IsLE] in { 8146def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; 8147def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; 8148def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; 8149def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; 8150def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; 8151def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; 8152 8153def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8154def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; 8155def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; 8156def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8157def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>; 8158def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8159} 8160let Predicates = [IsBE] in { 8161def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), 8162 (v4f16 (REV64v4i16 FPR64:$src))>; 8163def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), 8164 (v4f16 (REV32v4i16 FPR64:$src))>; 8165def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), 8166 (v4f16 (REV16v8i8 FPR64:$src))>; 8167def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), 8168 (v4f16 (REV64v4i16 FPR64:$src))>; 8169def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), 8170 (v4f16 (REV32v4i16 FPR64:$src))>; 8171def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), 8172 (v4f16 (REV64v4i16 FPR64:$src))>; 8173 8174def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), 8175 (v4bf16 (REV64v4i16 FPR64:$src))>; 8176def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), 8177 (v4bf16 (REV32v4i16 FPR64:$src))>; 8178def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), 8179 (v4bf16 (REV16v8i8 FPR64:$src))>; 8180def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), 8181 (v4bf16 (REV64v4i16 FPR64:$src))>; 8182def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), 8183 (v4bf16 (REV32v4i16 FPR64:$src))>; 8184def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), 8185 (v4bf16 (REV64v4i16 FPR64:$src))>; 8186} 8187def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; 8188def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; 8189 8190let Predicates = [IsLE] in { 8191def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; 8192def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; 8193def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; 8194def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 8195def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; 8196def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; 8197def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; 8198def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>; 8199} 8200let Predicates = [IsBE] in { 8201def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), 8202 (v8i8 (REV64v8i8 FPR64:$src))>; 8203def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), 8204 (v8i8 (REV32v8i8 FPR64:$src))>; 8205def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), 8206 (v8i8 (REV16v8i8 FPR64:$src))>; 8207def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), 8208 (v8i8 (REV64v8i8 FPR64:$src))>; 8209def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), 8210 (v8i8 (REV32v8i8 FPR64:$src))>; 8211def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), 8212 (v8i8 (REV64v8i8 FPR64:$src))>; 8213def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), 8214 (v8i8 (REV16v8i8 FPR64:$src))>; 8215def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), 8216 (v8i8 (REV16v8i8 FPR64:$src))>; 8217} 8218 8219let Predicates = [IsLE] in { 8220def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; 8221def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; 8222def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; 8223def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; 8224def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; 8225def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>; 8226} 8227let Predicates = [IsBE] in { 8228def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), 8229 (f64 (REV64v2i32 FPR64:$src))>; 8230def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), 8231 (f64 (REV64v4i16 FPR64:$src))>; 8232def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), 8233 (f64 (REV64v2i32 FPR64:$src))>; 8234def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), 8235 (f64 (REV64v8i8 FPR64:$src))>; 8236def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), 8237 (f64 (REV64v4i16 FPR64:$src))>; 8238def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), 8239 (f64 (REV64v4i16 FPR64:$src))>; 8240} 8241def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; 8242def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; 8243 8244let Predicates = [IsLE] in { 8245def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; 8246def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; 8247def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; 8248def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; 8249def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; 8250def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>; 8251} 8252let Predicates = [IsBE] in { 8253def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), 8254 (v1f64 (REV64v2i32 FPR64:$src))>; 8255def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), 8256 (v1f64 (REV64v4i16 FPR64:$src))>; 8257def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), 8258 (v1f64 (REV64v8i8 FPR64:$src))>; 8259def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), 8260 (v1f64 (REV64v2i32 FPR64:$src))>; 8261def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), 8262 (v1f64 (REV64v4i16 FPR64:$src))>; 8263def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), 8264 (v1f64 (REV64v4i16 FPR64:$src))>; 8265} 8266def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; 8267def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 8268 8269let Predicates = [IsLE] in { 8270def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; 8271def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; 8272def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; 8273def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; 8274def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 8275def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; 8276def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>; 8277} 8278let Predicates = [IsBE] in { 8279def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), 8280 (v2f32 (REV64v2i32 FPR64:$src))>; 8281def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), 8282 (v2f32 (REV32v4i16 FPR64:$src))>; 8283def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), 8284 (v2f32 (REV32v8i8 FPR64:$src))>; 8285def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), 8286 (v2f32 (REV64v2i32 FPR64:$src))>; 8287def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), 8288 (v2f32 (REV64v2i32 FPR64:$src))>; 8289def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), 8290 (v2f32 (REV32v4i16 FPR64:$src))>; 8291def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), 8292 (v2f32 (REV32v4i16 FPR64:$src))>; 8293} 8294def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; 8295 8296let Predicates = [IsLE] in { 8297def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; 8298def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; 8299def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; 8300def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; 8301def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; 8302def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; 8303def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>; 8304def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; 8305} 8306let Predicates = [IsBE] in { 8307def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), 8308 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8309def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), 8310 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8311 (REV64v4i32 FPR128:$src), (i32 8)))>; 8312def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), 8313 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8314 (REV64v8i16 FPR128:$src), (i32 8)))>; 8315def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), 8316 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8317 (REV64v8i16 FPR128:$src), (i32 8)))>; 8318def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), 8319 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8320 (REV64v8i16 FPR128:$src), (i32 8)))>; 8321def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), 8322 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8323def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), 8324 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8325 (REV64v4i32 FPR128:$src), (i32 8)))>; 8326def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), 8327 (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), 8328 (REV64v16i8 FPR128:$src), (i32 8)))>; 8329} 8330 8331let Predicates = [IsLE] in { 8332def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; 8333def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; 8334def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; 8335def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; 8336def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>; 8337def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; 8338def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; 8339} 8340let Predicates = [IsBE] in { 8341def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), 8342 (v2f64 (EXTv16i8 FPR128:$src, 8343 FPR128:$src, (i32 8)))>; 8344def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), 8345 (v2f64 (REV64v4i32 FPR128:$src))>; 8346def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), 8347 (v2f64 (REV64v8i16 FPR128:$src))>; 8348def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), 8349 (v2f64 (REV64v8i16 FPR128:$src))>; 8350def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), 8351 (v2f64 (REV64v8i16 FPR128:$src))>; 8352def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), 8353 (v2f64 (REV64v16i8 FPR128:$src))>; 8354def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), 8355 (v2f64 (REV64v4i32 FPR128:$src))>; 8356} 8357def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; 8358 8359let Predicates = [IsLE] in { 8360def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; 8361def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; 8362def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; 8363def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>; 8364def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; 8365def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; 8366def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; 8367} 8368let Predicates = [IsBE] in { 8369def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), 8370 (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8371 (REV64v4i32 FPR128:$src), (i32 8)))>; 8372def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), 8373 (v4f32 (REV32v8i16 FPR128:$src))>; 8374def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), 8375 (v4f32 (REV32v8i16 FPR128:$src))>; 8376def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), 8377 (v4f32 (REV32v8i16 FPR128:$src))>; 8378def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), 8379 (v4f32 (REV32v16i8 FPR128:$src))>; 8380def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), 8381 (v4f32 (REV64v4i32 FPR128:$src))>; 8382def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), 8383 (v4f32 (REV64v4i32 FPR128:$src))>; 8384} 8385def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; 8386 8387let Predicates = [IsLE] in { 8388def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; 8389def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; 8390def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; 8391def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; 8392def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; 8393def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; 8394def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>; 8395} 8396let Predicates = [IsBE] in { 8397def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), 8398 (v2i64 (EXTv16i8 FPR128:$src, 8399 FPR128:$src, (i32 8)))>; 8400def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), 8401 (v2i64 (REV64v4i32 FPR128:$src))>; 8402def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), 8403 (v2i64 (REV64v8i16 FPR128:$src))>; 8404def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), 8405 (v2i64 (REV64v16i8 FPR128:$src))>; 8406def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), 8407 (v2i64 (REV64v4i32 FPR128:$src))>; 8408def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), 8409 (v2i64 (REV64v8i16 FPR128:$src))>; 8410def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), 8411 (v2i64 (REV64v8i16 FPR128:$src))>; 8412} 8413def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; 8414 8415let Predicates = [IsLE] in { 8416def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; 8417def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; 8418def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; 8419def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; 8420def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; 8421def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; 8422def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>; 8423} 8424let Predicates = [IsBE] in { 8425def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), 8426 (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8427 (REV64v4i32 FPR128:$src), 8428 (i32 8)))>; 8429def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), 8430 (v4i32 (REV64v4i32 FPR128:$src))>; 8431def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), 8432 (v4i32 (REV32v8i16 FPR128:$src))>; 8433def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), 8434 (v4i32 (REV32v16i8 FPR128:$src))>; 8435def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), 8436 (v4i32 (REV64v4i32 FPR128:$src))>; 8437def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), 8438 (v4i32 (REV32v8i16 FPR128:$src))>; 8439def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), 8440 (v4i32 (REV32v8i16 FPR128:$src))>; 8441} 8442def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; 8443 8444let Predicates = [IsLE] in { 8445def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; 8446def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; 8447def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; 8448def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; 8449def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; 8450def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; 8451} 8452let Predicates = [IsBE] in { 8453def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), 8454 (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8455 (REV64v8i16 FPR128:$src), 8456 (i32 8)))>; 8457def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), 8458 (v8i16 (REV64v8i16 FPR128:$src))>; 8459def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), 8460 (v8i16 (REV32v8i16 FPR128:$src))>; 8461def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), 8462 (v8i16 (REV16v16i8 FPR128:$src))>; 8463def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), 8464 (v8i16 (REV64v8i16 FPR128:$src))>; 8465def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), 8466 (v8i16 (REV32v8i16 FPR128:$src))>; 8467} 8468def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; 8469def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>; 8470 8471let Predicates = [IsLE] in { 8472def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; 8473def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; 8474def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; 8475def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; 8476def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; 8477def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; 8478 8479def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>; 8480def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8481def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8482def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; 8483def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8484def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8485} 8486let Predicates = [IsBE] in { 8487def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), 8488 (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8489 (REV64v8i16 FPR128:$src), 8490 (i32 8)))>; 8491def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), 8492 (v8f16 (REV64v8i16 FPR128:$src))>; 8493def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), 8494 (v8f16 (REV32v8i16 FPR128:$src))>; 8495def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), 8496 (v8f16 (REV16v16i8 FPR128:$src))>; 8497def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), 8498 (v8f16 (REV64v8i16 FPR128:$src))>; 8499def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), 8500 (v8f16 (REV32v8i16 FPR128:$src))>; 8501 8502def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), 8503 (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8504 (REV64v8i16 FPR128:$src), 8505 (i32 8)))>; 8506def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), 8507 (v8bf16 (REV64v8i16 FPR128:$src))>; 8508def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), 8509 (v8bf16 (REV32v8i16 FPR128:$src))>; 8510def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), 8511 (v8bf16 (REV16v16i8 FPR128:$src))>; 8512def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), 8513 (v8bf16 (REV64v8i16 FPR128:$src))>; 8514def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), 8515 (v8bf16 (REV32v8i16 FPR128:$src))>; 8516} 8517def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; 8518def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; 8519 8520let Predicates = [IsLE] in { 8521def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; 8522def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; 8523def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; 8524def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; 8525def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; 8526def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; 8527def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; 8528def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>; 8529} 8530let Predicates = [IsBE] in { 8531def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), 8532 (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), 8533 (REV64v16i8 FPR128:$src), 8534 (i32 8)))>; 8535def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), 8536 (v16i8 (REV64v16i8 FPR128:$src))>; 8537def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), 8538 (v16i8 (REV32v16i8 FPR128:$src))>; 8539def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), 8540 (v16i8 (REV16v16i8 FPR128:$src))>; 8541def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), 8542 (v16i8 (REV64v16i8 FPR128:$src))>; 8543def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), 8544 (v16i8 (REV32v16i8 FPR128:$src))>; 8545def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), 8546 (v16i8 (REV16v16i8 FPR128:$src))>; 8547def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), 8548 (v16i8 (REV16v16i8 FPR128:$src))>; 8549} 8550 8551def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), 8552 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8553def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), 8554 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8555def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), 8556 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8557def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), 8558 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8559def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))), 8560 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8561def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), 8562 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8563def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), 8564 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8565def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), 8566 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8567 8568def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), 8569 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8570def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), 8571 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8572def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), 8573 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8574def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), 8575 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8576 8577// A 64-bit subvector insert to the first 128-bit vector position 8578// is a subregister copy that needs no instruction. 8579multiclass InsertSubvectorUndef<ValueType Ty> { 8580 def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), 8581 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8582 def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), 8583 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8584 def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), 8585 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8586 def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), 8587 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8588 def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), 8589 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8590 def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), 8591 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8592 def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)), 8593 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8594 def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), 8595 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8596} 8597 8598defm : InsertSubvectorUndef<i32>; 8599defm : InsertSubvectorUndef<i64>; 8600 8601// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 8602// or v2f32. 8603def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), 8604 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), 8605 (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; 8606def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), 8607 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), 8608 (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; 8609 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, 8610 // so we match on v4f32 here, not v2f32. This will also catch adding 8611 // the low two lanes of a true v4f32 vector. 8612def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), 8613 (vector_extract (v4f32 FPR128:$Rn), (i64 1))), 8614 (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8615def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), 8616 (vector_extract (v8f16 FPR128:$Rn), (i64 1))), 8617 (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8618 8619// Prefer using the bottom lanes of addp Rn, Rn compared to 8620// addp extractlow(Rn), extracthigh(Rn) 8621def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))), 8622 (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))), 8623 (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>; 8624def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))), 8625 (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))), 8626 (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>; 8627def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))), 8628 (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))), 8629 (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>; 8630 8631def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))), 8632 (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))), 8633 (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>; 8634def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))), 8635 (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))), 8636 (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>; 8637 8638// Scalar 64-bit shifts in FPR64 registers. 8639def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8640 (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8641def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8642 (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8643def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8644 (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8645def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8646 (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8647 8648// Patterns for nontemporal/no-allocate stores. 8649// We have to resort to tricks to turn a single-input store into a store pair, 8650// because there is no single-input nontemporal store, only STNP. 8651let Predicates = [IsLE] in { 8652let AddedComplexity = 15 in { 8653class NTStore128Pat<ValueType VT> : 8654 Pat<(nontemporalstore (VT FPR128:$Rt), 8655 (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 8656 (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), 8657 (DUPi64 FPR128:$Rt, (i64 1)), 8658 GPR64sp:$Rn, simm7s8:$offset)>; 8659 8660def : NTStore128Pat<v2i64>; 8661def : NTStore128Pat<v4i32>; 8662def : NTStore128Pat<v8i16>; 8663def : NTStore128Pat<v16i8>; 8664 8665class NTStore64Pat<ValueType VT> : 8666 Pat<(nontemporalstore (VT FPR64:$Rt), 8667 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 8668 (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), 8669 (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), 8670 GPR64sp:$Rn, simm7s4:$offset)>; 8671 8672// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? 8673def : NTStore64Pat<v1f64>; 8674def : NTStore64Pat<v1i64>; 8675def : NTStore64Pat<v2i32>; 8676def : NTStore64Pat<v4i16>; 8677def : NTStore64Pat<v8i8>; 8678 8679def : Pat<(nontemporalstore GPR64:$Rt, 8680 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 8681 (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), 8682 (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), 8683 GPR64sp:$Rn, simm7s4:$offset)>; 8684} // AddedComplexity=10 8685} // Predicates = [IsLE] 8686 8687// Tail call return handling. These are all compiler pseudo-instructions, 8688// so no encoding information or anything like that. 8689let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { 8690 def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, 8691 Sched<[WriteBrReg]>; 8692 def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, 8693 Sched<[WriteBrReg]>; 8694 // Indirect tail-call with any register allowed, used by MachineOutliner when 8695 // this is proven safe. 8696 // FIXME: If we have to add any more hacks like this, we should instead relax 8697 // some verifier checks for outlined functions. 8698 def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>, 8699 Sched<[WriteBrReg]>; 8700 // Indirect tail-call limited to only use registers (x16 and x17) which are 8701 // allowed to tail-call a "BTI c" instruction. 8702 def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>, 8703 Sched<[WriteBrReg]>; 8704} 8705 8706def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), 8707 (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>, 8708 Requires<[NotUseBTI]>; 8709def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)), 8710 (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>, 8711 Requires<[UseBTI]>; 8712def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), 8713 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 8714def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), 8715 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 8716 8717def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; 8718def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; 8719 8720// Extracting lane zero is a special case where we can just use a plain 8721// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the 8722// rest of the compiler, especially the register allocator and copy propagation, 8723// to reason about, so is preferred when it's possible to use it. 8724let AddedComplexity = 10 in { 8725 def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; 8726 def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; 8727 def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; 8728} 8729 8730// dot_v4i8 8731class mul_v4i8<SDPatternOperator ldop> : 8732 PatFrag<(ops node:$Rn, node:$Rm, node:$offset), 8733 (mul (ldop (add node:$Rn, node:$offset)), 8734 (ldop (add node:$Rm, node:$offset)))>; 8735class mulz_v4i8<SDPatternOperator ldop> : 8736 PatFrag<(ops node:$Rn, node:$Rm), 8737 (mul (ldop node:$Rn), (ldop node:$Rm))>; 8738 8739def load_v4i8 : 8740 OutPatFrag<(ops node:$R), 8741 (INSERT_SUBREG 8742 (v2i32 (IMPLICIT_DEF)), 8743 (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)), 8744 ssub)>; 8745 8746class dot_v4i8<Instruction DOT, SDPatternOperator ldop> : 8747 Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)), 8748 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)), 8749 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)), 8750 (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))), 8751 (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR), 8752 (load_v4i8 GPR64sp:$Rn), 8753 (load_v4i8 GPR64sp:$Rm))), 8754 sub_32)>, Requires<[HasDotProd]>; 8755 8756// dot_v8i8 8757class ee_v8i8<SDPatternOperator extend> : 8758 PatFrag<(ops node:$V, node:$K), 8759 (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>; 8760 8761class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 8762 PatFrag<(ops node:$M, node:$N, node:$K), 8763 (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)), 8764 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>; 8765 8766class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 8767 PatFrag<(ops node:$M, node:$N), 8768 (i32 (extractelt 8769 (v4i32 (AArch64uaddv 8770 (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)), 8771 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))), 8772 (i64 0)))>; 8773 8774// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 8775def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>; 8776 8777class odot_v8i8<Instruction DOT> : 8778 OutPatFrag<(ops node:$Vm, node:$Vn), 8779 (EXTRACT_SUBREG 8780 (VADDV_32 8781 (i64 (DOT (DUPv2i32gpr WZR), 8782 (v8i8 node:$Vm), 8783 (v8i8 node:$Vn)))), 8784 sub_32)>; 8785 8786class dot_v8i8<Instruction DOT, SDPatternOperator mulop, 8787 SDPatternOperator extend> : 8788 Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn), 8789 (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>, 8790 Requires<[HasDotProd]>; 8791 8792// dot_v16i8 8793class ee_v16i8<SDPatternOperator extend> : 8794 PatFrag<(ops node:$V, node:$K1, node:$K2), 8795 (v4i16 (extract_subvector 8796 (v8i16 (extend 8797 (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>; 8798 8799class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> : 8800 PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2), 8801 (v4i32 8802 (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)), 8803 (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>; 8804 8805class idot_v16i8<SDPatternOperator m, SDPatternOperator x> : 8806 PatFrag<(ops node:$M, node:$N), 8807 (i32 (extractelt 8808 (v4i32 (AArch64uaddv 8809 (add 8810 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)), 8811 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))), 8812 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)), 8813 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))), 8814 (i64 0)))>; 8815 8816class odot_v16i8<Instruction DOT> : 8817 OutPatFrag<(ops node:$Vm, node:$Vn), 8818 (i32 (ADDVv4i32v 8819 (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>; 8820 8821class dot_v16i8<Instruction DOT, SDPatternOperator mulop, 8822 SDPatternOperator extend> : 8823 Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn), 8824 (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>, 8825 Requires<[HasDotProd]>; 8826 8827let AddedComplexity = 10 in { 8828 def : dot_v4i8<SDOTv8i8, sextloadi8>; 8829 def : dot_v4i8<UDOTv8i8, zextloadi8>; 8830 def : dot_v8i8<SDOTv8i8, AArch64smull, sext>; 8831 def : dot_v8i8<UDOTv8i8, AArch64umull, zext>; 8832 def : dot_v16i8<SDOTv16i8, AArch64smull, sext>; 8833 def : dot_v16i8<UDOTv16i8, AArch64umull, zext>; 8834 8835 // FIXME: add patterns to generate vector by element dot product. 8836 // FIXME: add SVE dot-product patterns. 8837} 8838 8839// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs, 8840// so that it can be used as input to inline asm, and vice versa. 8841def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>; 8842def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>; 8843def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, 8844 GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)), 8845 (REG_SEQUENCE GPR64x8Class, 8846 $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, 8847 $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>; 8848foreach i = 0-7 in { 8849 def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))), 8850 (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>; 8851} 8852 8853let Predicates = [HasLS64] in { 8854 def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), 8855 (outs GPR64x8:$Rt)>; 8856 def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn), 8857 (outs)>; 8858 def ST64BV: Store64BV<0b011, "st64bv">; 8859 def ST64BV0: Store64BV<0b010, "st64bv0">; 8860 8861 class ST64BPattern<Intrinsic intrinsic, Instruction instruction> 8862 : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7), 8863 (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>; 8864 8865 def : ST64BPattern<int_aarch64_st64b, ST64B>; 8866 def : ST64BPattern<int_aarch64_st64bv, ST64BV>; 8867 def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>; 8868} 8869 8870let Predicates = [HasMOPS] in { 8871 let Defs = [NZCV] in { 8872 defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; 8873 8874 defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; 8875 8876 defm SETP : MOPSMemorySetInsns<0b00, "setp">; 8877 } 8878 let Uses = [NZCV] in { 8879 defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; 8880 defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; 8881 8882 defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; 8883 defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; 8884 8885 defm SETM : MOPSMemorySetInsns<0b01, "setm">; 8886 defm SETE : MOPSMemorySetInsns<0b10, "sete">; 8887 } 8888} 8889let Predicates = [HasMOPS, HasMTE] in { 8890 let Defs = [NZCV] in { 8891 defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; 8892 } 8893 let Uses = [NZCV] in { 8894 defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; 8895 // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td 8896 defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; 8897 } 8898} 8899 8900// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain 8901// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain 8902def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; 8903def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; 8904def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; 8905def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; 8906def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; 8907 8908// MOPS operations always contain three 4-byte instructions 8909let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { 8910 let mayLoad = 1 in { 8911 def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 8912 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 8913 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 8914 def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 8915 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 8916 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 8917 } 8918 let mayLoad = 0 in { 8919 def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 8920 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 8921 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 8922 } 8923} 8924let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in { 8925 def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 8926 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 8927 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 8928} 8929 8930//----------------------------------------------------------------------------- 8931// v8.3 Pointer Authentication late patterns 8932 8933let Predicates = [HasPAuth] in { 8934def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm), 8935 (MOVKXi GPR64:$Rd, (trunc_imm imm64_0_65535:$imm), 48)>; 8936def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn), 8937 (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>; 8938} 8939 8940//----------------------------------------------------------------------------- 8941 8942// This gets lowered into an instruction sequence of 20 bytes 8943let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in 8944def StoreSwiftAsyncContext 8945 : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), 8946 []>, Sched<[]>; 8947 8948def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>; 8949def : Pat<(AArch64AssertZExtBool GPR32:$op), 8950 (i32 GPR32:$op)>; 8951 8952//===----------------------------===// 8953// 2022 Architecture Extensions: 8954//===----------------------------===// 8955 8956def : InstAlias<"clrbhb", (HINT 22), 0>; 8957let Predicates = [HasCLRBHB] in { 8958 def : InstAlias<"clrbhb", (HINT 22), 1>; 8959} 8960 8961//===----------------------------------------------------------------------===// 8962// Translation Hardening Extension (FEAT_THE) 8963//===----------------------------------------------------------------------===// 8964defm RCW : ReadCheckWriteCompareAndSwap; 8965 8966defm RCWCLR : ReadCheckWriteOperation<0b001, "clr">; 8967defm RCWSET : ReadCheckWriteOperation<0b011, "set">; 8968defm RCWSWP : ReadCheckWriteOperation<0b010, "swp">; 8969 8970//===----------------------------------------------------------------------===// 8971// General Data-Processing Instructions (FEAT_V94_DP) 8972//===----------------------------------------------------------------------===// 8973defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>; 8974defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>; 8975defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>; 8976 8977defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>; 8978defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>; 8979defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>; 8980defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>; 8981 8982def RPRFM: 8983 I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn), 8984 "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>, 8985 Sched<[]> { 8986 bits<6> Rt; 8987 bits<5> Rn; 8988 bits<5> Rm; 8989 let Inst{2-0} = Rt{2-0}; 8990 let Inst{4-3} = 0b11; 8991 let Inst{9-5} = Rn; 8992 let Inst{11-10} = 0b10; 8993 let Inst{13-12} = Rt{4-3}; 8994 let Inst{14} = 0b1; 8995 let Inst{15} = Rt{5}; 8996 let Inst{20-16} = Rm; 8997 let Inst{31-21} = 0b11111000101; 8998 let mayLoad = 0; 8999 let mayStore = 0; 9000 let hasSideEffects = 1; 9001 // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns 9002 // Fail, the decoder should attempt to decode RPRFM. This requires setting 9003 // the decoder namespace to "Fallback". 9004 let DecoderNamespace = "Fallback"; 9005} 9006 9007//===----------------------------------------------------------------------===// 9008// 128-bit Atomics (FEAT_LSE128) 9009//===----------------------------------------------------------------------===// 9010let Predicates = [HasLSE128] in { 9011 def SWPP : LSE128Base<0b000, 0b00, 0b1, "swpp">; 9012 def SWPPA : LSE128Base<0b000, 0b10, 0b1, "swppa">; 9013 def SWPPAL : LSE128Base<0b000, 0b11, 0b1, "swppal">; 9014 def SWPPL : LSE128Base<0b000, 0b01, 0b1, "swppl">; 9015 def LDCLRP : LSE128Base<0b001, 0b00, 0b0, "ldclrp">; 9016 def LDCLRPA : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">; 9017 def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">; 9018 def LDCLRPL : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">; 9019 def LDSETP : LSE128Base<0b011, 0b00, 0b0, "ldsetp">; 9020 def LDSETPA : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">; 9021 def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">; 9022 def LDSETPL : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">; 9023} 9024 9025//===----------------------------------------------------------------------===// 9026// RCPC Instructions (FEAT_LRCPC3) 9027//===----------------------------------------------------------------------===// 9028 9029let Predicates = [HasRCPC3] in { 9030 // size opc opc2 9031 def STILPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">; 9032 def STILPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">; 9033 def STILPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 9034 def STILPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 9035 def LDIAPPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">; 9036 def LDIAPPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">; 9037 def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 9038 def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 9039 9040 def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>; 9041 def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>; 9042 9043 // Aliases for when offset=0 9044 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>; 9045 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>; 9046 9047 // size opc 9048 def STLRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">; 9049 def STLRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">; 9050 def LDAPRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #4", "$Rn = $wback">; 9051 def LDAPRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #8", "$Rn = $wback">; 9052} 9053 9054let Predicates = [HasRCPC3, HasNEON] in { 9055 // size opc regtype 9056 defm STLURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8 , (outs), (ins FPR8 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9057 defm STLURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9058 defm STLURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9059 defm STLURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9060 defm STLURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9061 defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8 , (outs FPR8 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9062 defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9063 defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9064 defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9065 defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9066 9067 // L 9068 def STL1: LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">; 9069 def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">; 9070 9071 // Aliases for when offset=0 9072 def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>; 9073} 9074 9075//===----------------------------------------------------------------------===// 9076// 128-bit System Instructions (FEAT_SYSINSTR128) 9077//===----------------------------------------------------------------------===// 9078let Predicates = [HasD128] in { 9079 def SYSPxt : SystemPXtI<0, "sysp">; 9080 9081 def SYSPxt_XZR 9082 : BaseSystemI<0, (outs), 9083 (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair), 9084 "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">, 9085 Sched<[WriteSys]> 9086 { 9087 // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?) 9088 // and therefore autogenerates a decoder that builds an MC representation that has 4 fields 9089 // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one 9090 // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc 9091 // is based off of the asm template (maybe) and therefore wants to print 5 operands. 9092 // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would 9093 // overlap with the main SYSP instruction. 9094 let DecoderMethod = "DecodeSyspXzrInstruction"; 9095 bits<3> op1; 9096 bits<4> Cn; 9097 bits<4> Cm; 9098 bits<3> op2; 9099 let Inst{22} = 0b1; // override BaseSystemI 9100 let Inst{20-19} = 0b01; 9101 let Inst{18-16} = op1; 9102 let Inst{15-12} = Cn; 9103 let Inst{11-8} = Cm; 9104 let Inst{7-5} = op2; 9105 let Inst{4-0} = 0b11111; 9106 } 9107 9108 def : InstAlias<"sysp $op1, $Cn, $Cm, $op2", 9109 (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 9110} 9111 9112//--- 9113// 128-bit System Registers (FEAT_SYSREG128) 9114//--- 9115 9116// Instruction encoding: 9117// 9118// 31 22|21|20|19|18 16|15 12|11 8|7 5|4 0 9119// MRRS 1101010101| 1| 1|o0| op1| Cn| Cm|op2| Rt 9120// MSRR 1101010101| 0| 1|o0| op1| Cn| Cm|op2| Rt 9121 9122// Instruction syntax: 9123// 9124// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>> 9125// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1> 9126// 9127// ...where t is even (X0, X2, etc). 9128 9129let Predicates = [HasD128] in { 9130 def MRRS : RtSystemI128<1, 9131 (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg), 9132 "mrrs", "\t$Rt, $systemreg"> 9133 { 9134 bits<16> systemreg; 9135 let Inst{20-5} = systemreg; 9136 } 9137 9138 def MSRR : RtSystemI128<0, 9139 (outs), (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt), 9140 "msrr", "\t$systemreg, $Rt"> 9141 { 9142 bits<16> systemreg; 9143 let Inst{20-5} = systemreg; 9144 } 9145} 9146 9147 9148include "AArch64InstrAtomics.td" 9149include "AArch64SVEInstrInfo.td" 9150include "AArch64SMEInstrInfo.td" 9151include "AArch64InstrGISel.td" 9152