1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Instruction definitions. 10// 11//===----------------------------------------------------------------------===// 12 13//===----------------------------------------------------------------------===// 14// ARM Instruction Predicate Definitions. 15// 16def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">, 17 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">; 18def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, 19 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">; 20def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, 21 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">; 22def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, 23 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">; 24def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, 25 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">; 26def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, 27 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">; 28def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, 29 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">; 30def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">, 31 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">; 32def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">, 33 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">; 34def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">, 35 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">; 36def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">, 37 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">; 38def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, 39 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">; 40def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, 41 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">; 42def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, 43 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">; 44def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">, 45 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">; 46def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, 47 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">; 48 49def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">, 50 AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">; 51 52def HasEL3 : Predicate<"Subtarget->hasEL3()">, 53 AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">; 54 55def HasVH : Predicate<"Subtarget->hasVH()">, 56 AssemblerPredicateWithAll<(all_of FeatureVH), "vh">; 57 58def HasLOR : Predicate<"Subtarget->hasLOR()">, 59 AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">; 60 61def HasPAuth : Predicate<"Subtarget->hasPAuth()">, 62 AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; 63 64def HasJS : Predicate<"Subtarget->hasJS()">, 65 AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; 66 67def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, 68 AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">; 69 70def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, 71 AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">; 72 73def HasNV : Predicate<"Subtarget->hasNV()">, 74 AssemblerPredicateWithAll<(all_of FeatureNV), "nv">; 75 76def HasMPAM : Predicate<"Subtarget->hasMPAM()">, 77 AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">; 78 79def HasDIT : Predicate<"Subtarget->hasDIT()">, 80 AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">; 81 82def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, 83 AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">; 84 85def HasAM : Predicate<"Subtarget->hasAM()">, 86 AssemblerPredicateWithAll<(all_of FeatureAM), "am">; 87 88def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, 89 AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">; 90 91def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, 92 AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">; 93 94def HasFlagM : Predicate<"Subtarget->hasFlagM()">, 95 AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; 96 97def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">, 98 AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; 99 100def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, 101 AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">; 102def HasNEON : Predicate<"Subtarget->hasNEON()">, 103 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">; 104def HasCrypto : Predicate<"Subtarget->hasCrypto()">, 105 AssemblerPredicateWithAll<(all_of FeatureCrypto), "crypto">; 106def HasSM4 : Predicate<"Subtarget->hasSM4()">, 107 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">; 108def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, 109 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">; 110def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, 111 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">; 112def HasAES : Predicate<"Subtarget->hasAES()">, 113 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">; 114def HasDotProd : Predicate<"Subtarget->hasDotProd()">, 115 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">; 116def HasCRC : Predicate<"Subtarget->hasCRC()">, 117 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">; 118def HasCSSC : Predicate<"Subtarget->hasCSSC()">, 119 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">; 120def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">; 121def HasLSE : Predicate<"Subtarget->hasLSE()">, 122 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">; 123def HasNoLSE : Predicate<"!Subtarget->hasLSE()">; 124def HasRAS : Predicate<"Subtarget->hasRAS()">, 125 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">; 126def HasRDM : Predicate<"Subtarget->hasRDM()">, 127 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">; 128def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, 129 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">; 130def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, 131 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">; 132def HasSPE : Predicate<"Subtarget->hasSPE()">, 133 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">; 134def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, 135 AssemblerPredicateWithAll<(all_of FeatureFuseAES), 136 "fuse-aes">; 137def HasSVE : Predicate<"Subtarget->hasSVE()">, 138 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">; 139def HasSVE2 : Predicate<"Subtarget->hasSVE2()">, 140 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">; 141def HasSVE2p1 : Predicate<"Subtarget->hasSVE2p1()">, 142 AssemblerPredicate<(any_of FeatureSVE2p1), "sve2p1">; 143def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">, 144 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">; 145def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, 146 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">; 147def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, 148 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">; 149def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, 150 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; 151def HasB16B16 : Predicate<"Subtarget->hasB16B16()">, 152 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">; 153def HasSME : Predicate<"Subtarget->hasSME()">, 154 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; 155def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">, 156 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">; 157def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">, 158 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">; 159def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">, 160 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">; 161def HasSME2 : Predicate<"Subtarget->hasSME2()">, 162 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">; 163def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">, 164 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">; 165 166// A subset of SVE(2) instructions are legal in Streaming SVE execution mode, 167// they should be enabled if either has been specified. 168def HasSVEorSME 169 : Predicate<"Subtarget->hasSVEorSME()">, 170 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), 171 "sve or sme">; 172def HasSVE2orSME 173 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">, 174 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME), 175 "sve2 or sme">; 176def HasSVE2p1_or_HasSME 177 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">, 178 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; 179def HasSVE2p1_or_HasSME2 180 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">, 181 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; 182def HasSVE2p1_or_HasSME2p1 183 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">, 184 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">; 185// A subset of NEON instructions are legal in Streaming SVE execution mode, 186// they should be enabled if either has been specified. 187def HasNEONorSME 188 : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">, 189 AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME), 190 "neon or sme">; 191def HasRCPC : Predicate<"Subtarget->hasRCPC()">, 192 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; 193def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, 194 AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">; 195def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">, 196 AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">; 197def HasSB : Predicate<"Subtarget->hasSB()">, 198 AssemblerPredicateWithAll<(all_of FeatureSB), "sb">; 199def HasPredRes : Predicate<"Subtarget->hasPredRes()">, 200 AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">; 201def HasCCDP : Predicate<"Subtarget->hasCCDP()">, 202 AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; 203def HasBTI : Predicate<"Subtarget->hasBTI()">, 204 AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; 205def HasMTE : Predicate<"Subtarget->hasMTE()">, 206 AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; 207def HasTME : Predicate<"Subtarget->hasTME()">, 208 AssemblerPredicateWithAll<(all_of FeatureTME), "tme">; 209def HasETE : Predicate<"Subtarget->hasETE()">, 210 AssemblerPredicateWithAll<(all_of FeatureETE), "ete">; 211def HasTRBE : Predicate<"Subtarget->hasTRBE()">, 212 AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">; 213def HasBF16 : Predicate<"Subtarget->hasBF16()">, 214 AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">; 215def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">, 216 AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">; 217def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">, 218 AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">; 219def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">, 220 AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">; 221def HasXS : Predicate<"Subtarget->hasXS()">, 222 AssemblerPredicateWithAll<(all_of FeatureXS), "xs">; 223def HasWFxT : Predicate<"Subtarget->hasWFxT()">, 224 AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">; 225def HasLS64 : Predicate<"Subtarget->hasLS64()">, 226 AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">; 227def HasBRBE : Predicate<"Subtarget->hasBRBE()">, 228 AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">; 229def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, 230 AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">; 231def HasHBC : Predicate<"Subtarget->hasHBC()">, 232 AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">; 233def HasMOPS : Predicate<"Subtarget->hasMOPS()">, 234 AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">; 235def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">, 236 AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">; 237def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">, 238 AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">; 239def HasITE : Predicate<"Subtarget->hasITE()">, 240 AssemblerPredicateWithAll<(all_of FeatureITE), "ite">; 241def HasTHE : Predicate<"Subtarget->hasTHE()">, 242 AssemblerPredicateWithAll<(all_of FeatureTHE), "the">; 243def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">, 244 AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">; 245def HasLSE128 : Predicate<"Subtarget->hasLSE128()">, 246 AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">; 247def HasD128 : Predicate<"Subtarget->hasD128()">, 248 AssemblerPredicateWithAll<(all_of FeatureD128), "d128">; 249def IsLE : Predicate<"Subtarget->isLittleEndian()">; 250def IsBE : Predicate<"!Subtarget->isLittleEndian()">; 251def IsWindows : Predicate<"Subtarget->isTargetWindows()">; 252def UseExperimentalZeroingPseudos 253 : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; 254def UseAlternateSExtLoadCVTF32 255 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; 256 257def UseNegativeImmediates 258 : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), 259 "NegativeImmediates">; 260 261def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; 262 263def NotInStreamingSVEMode : Predicate<"!Subtarget->forceStreamingCompatibleSVE()">; 264 265def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", 266 SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, 267 SDTCisInt<1>]>>; 268 269 270//===----------------------------------------------------------------------===// 271// AArch64-specific DAG Nodes. 272// 273 274// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS 275def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, 276 [SDTCisSameAs<0, 2>, 277 SDTCisSameAs<0, 3>, 278 SDTCisInt<0>, SDTCisVT<1, i32>]>; 279 280// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS 281def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, 282 [SDTCisSameAs<0, 1>, 283 SDTCisSameAs<0, 2>, 284 SDTCisInt<0>, 285 SDTCisVT<3, i32>]>; 286 287// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS 288def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, 289 [SDTCisSameAs<0, 2>, 290 SDTCisSameAs<0, 3>, 291 SDTCisInt<0>, 292 SDTCisVT<1, i32>, 293 SDTCisVT<4, i32>]>; 294 295def SDT_AArch64Brcond : SDTypeProfile<0, 3, 296 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, 297 SDTCisVT<2, i32>]>; 298def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; 299def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 300 SDTCisVT<2, OtherVT>]>; 301 302 303def SDT_AArch64CSel : SDTypeProfile<1, 4, 304 [SDTCisSameAs<0, 1>, 305 SDTCisSameAs<0, 2>, 306 SDTCisInt<3>, 307 SDTCisVT<4, i32>]>; 308def SDT_AArch64CCMP : SDTypeProfile<1, 5, 309 [SDTCisVT<0, i32>, 310 SDTCisInt<1>, 311 SDTCisSameAs<1, 2>, 312 SDTCisInt<3>, 313 SDTCisInt<4>, 314 SDTCisVT<5, i32>]>; 315def SDT_AArch64FCCMP : SDTypeProfile<1, 5, 316 [SDTCisVT<0, i32>, 317 SDTCisFP<1>, 318 SDTCisSameAs<1, 2>, 319 SDTCisInt<3>, 320 SDTCisInt<4>, 321 SDTCisVT<5, i32>]>; 322def SDT_AArch64FCmp : SDTypeProfile<0, 2, 323 [SDTCisFP<0>, 324 SDTCisSameAs<0, 1>]>; 325def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; 326def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; 327def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; 328def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, 329 SDTCisSameAs<0, 1>, 330 SDTCisSameAs<0, 2>]>; 331def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; 332def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 333def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 334 SDTCisInt<2>, SDTCisInt<3>]>; 335def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 336def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 337 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 338def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; 339def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 340 SDTCisVec<2>, SDTCisSameAs<2,3>]>; 341 342def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>, 343 SDTCisSameAs<0,1>, 344 SDTCisSameAs<0,2>]>; 345 346def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 347def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; 348def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; 349def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 350 SDTCisSameAs<0,2>]>; 351def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 352 SDTCisSameAs<0,2>, 353 SDTCisSameAs<0,3>]>; 354def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; 355def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; 356 357def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; 358 359def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, 360 SDTCisPtrTy<1>]>; 361 362def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 363 364def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 365def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 366def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 367def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 368 369// Generates the general dynamic sequences, i.e. 370// adrp x0, :tlsdesc:var 371// ldr x1, [x0, #:tlsdesc_lo12:var] 372// add x0, x0, #:tlsdesc_lo12:var 373// .tlsdesccall var 374// blr x1 375 376// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) 377// number of operands (the variable) 378def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, 379 [SDTCisPtrTy<0>]>; 380 381def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, 382 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, 383 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, 384 SDTCisSameAs<1, 4>]>; 385 386def SDT_AArch64TBL : SDTypeProfile<1, 2, [ 387 SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> 388]>; 389 390// non-extending masked load fragment. 391def nonext_masked_load : 392 PatFrag<(ops node:$ptr, node:$pred, node:$def), 393 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 394 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 395 cast<MaskedLoadSDNode>(N)->isUnindexed() && 396 !cast<MaskedLoadSDNode>(N)->isNonTemporal(); 397}]>; 398// Any/Zero extending masked load fragments. 399def azext_masked_load : 400 PatFrag<(ops node:$ptr, node:$pred, node:$def), 401 (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ 402 return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD || 403 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) && 404 cast<MaskedLoadSDNode>(N)->isUnindexed(); 405}]>; 406def azext_masked_load_i8 : 407 PatFrag<(ops node:$ptr, node:$pred, node:$def), 408 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 409 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 410}]>; 411def azext_masked_load_i16 : 412 PatFrag<(ops node:$ptr, node:$pred, node:$def), 413 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 414 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 415}]>; 416def azext_masked_load_i32 : 417 PatFrag<(ops node:$ptr, node:$pred, node:$def), 418 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 419 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 420}]>; 421// Sign extending masked load fragments. 422def sext_masked_load : 423 PatFrag<(ops node:$ptr, node:$pred, node:$def), 424 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 425 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD && 426 cast<MaskedLoadSDNode>(N)->isUnindexed(); 427}]>; 428def sext_masked_load_i8 : 429 PatFrag<(ops node:$ptr, node:$pred, node:$def), 430 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 431 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 432}]>; 433def sext_masked_load_i16 : 434 PatFrag<(ops node:$ptr, node:$pred, node:$def), 435 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 436 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 437}]>; 438def sext_masked_load_i32 : 439 PatFrag<(ops node:$ptr, node:$pred, node:$def), 440 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 441 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 442}]>; 443 444def non_temporal_load : 445 PatFrag<(ops node:$ptr, node:$pred, node:$def), 446 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 447 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 448 cast<MaskedLoadSDNode>(N)->isUnindexed() && 449 cast<MaskedLoadSDNode>(N)->isNonTemporal(); 450}]>; 451 452// non-truncating masked store fragment. 453def nontrunc_masked_store : 454 PatFrag<(ops node:$val, node:$ptr, node:$pred), 455 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 456 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 457 cast<MaskedStoreSDNode>(N)->isUnindexed() && 458 !cast<MaskedStoreSDNode>(N)->isNonTemporal(); 459}]>; 460// truncating masked store fragments. 461def trunc_masked_store : 462 PatFrag<(ops node:$val, node:$ptr, node:$pred), 463 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 464 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 465 cast<MaskedStoreSDNode>(N)->isUnindexed(); 466}]>; 467def trunc_masked_store_i8 : 468 PatFrag<(ops node:$val, node:$ptr, node:$pred), 469 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 470 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 471}]>; 472def trunc_masked_store_i16 : 473 PatFrag<(ops node:$val, node:$ptr, node:$pred), 474 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 475 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 476}]>; 477def trunc_masked_store_i32 : 478 PatFrag<(ops node:$val, node:$ptr, node:$pred), 479 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 480 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 481}]>; 482 483def non_temporal_store : 484 PatFrag<(ops node:$val, node:$ptr, node:$pred), 485 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 486 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 487 cast<MaskedStoreSDNode>(N)->isUnindexed() && 488 cast<MaskedStoreSDNode>(N)->isNonTemporal(); 489}]>; 490 491multiclass masked_gather_scatter<PatFrags GatherScatterOp> { 492 // offsets = (signed)Index << sizeof(elt) 493 def NAME#_signed_scaled : 494 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 495 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 496 auto MGS = cast<MaskedGatherScatterSDNode>(N); 497 bool Signed = MGS->isIndexSigned() || 498 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 499 return Signed && MGS->isIndexScaled(); 500 }]>; 501 // offsets = (signed)Index 502 def NAME#_signed_unscaled : 503 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 504 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 505 auto MGS = cast<MaskedGatherScatterSDNode>(N); 506 bool Signed = MGS->isIndexSigned() || 507 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 508 return Signed && !MGS->isIndexScaled(); 509 }]>; 510 // offsets = (unsigned)Index << sizeof(elt) 511 def NAME#_unsigned_scaled : 512 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 513 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 514 auto MGS = cast<MaskedGatherScatterSDNode>(N); 515 bool Signed = MGS->isIndexSigned() || 516 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 517 return !Signed && MGS->isIndexScaled(); 518 }]>; 519 // offsets = (unsigned)Index 520 def NAME#_unsigned_unscaled : 521 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 522 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 523 auto MGS = cast<MaskedGatherScatterSDNode>(N); 524 bool Signed = MGS->isIndexSigned() || 525 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 526 return !Signed && !MGS->isIndexScaled(); 527 }]>; 528} 529 530defm nonext_masked_gather : masked_gather_scatter<nonext_masked_gather>; 531defm azext_masked_gather_i8 : masked_gather_scatter<azext_masked_gather_i8>; 532defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>; 533defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>; 534defm sext_masked_gather_i8 : masked_gather_scatter<sext_masked_gather_i8>; 535defm sext_masked_gather_i16 : masked_gather_scatter<sext_masked_gather_i16>; 536defm sext_masked_gather_i32 : masked_gather_scatter<sext_masked_gather_i32>; 537 538defm nontrunc_masked_scatter : masked_gather_scatter<nontrunc_masked_scatter>; 539defm trunc_masked_scatter_i8 : masked_gather_scatter<trunc_masked_scatter_i8>; 540defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>; 541defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>; 542 543// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise 544def top16Zero: PatLeaf<(i32 GPR32:$src), [{ 545 return SDValue(N,0)->getValueType(0) == MVT::i32 && 546 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); 547 }]>; 548 549// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise 550def top32Zero: PatLeaf<(i64 GPR64:$src), [{ 551 return SDValue(N,0)->getValueType(0) == MVT::i64 && 552 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32)); 553 }]>; 554 555// topbitsallzero - Return true if all bits except the lowest bit are known zero 556def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{ 557 return SDValue(N,0)->getValueType(0) == MVT::i32 && 558 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31)); 559 }]>; 560def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{ 561 return SDValue(N,0)->getValueType(0) == MVT::i64 && 562 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63)); 563 }]>; 564 565// Node definitions. 566def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; 567def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; 568def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; 569def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; 570def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", 571 SDCallSeqStart<[ SDTCisVT<0, i32>, 572 SDTCisVT<1, i32> ]>, 573 [SDNPHasChain, SDNPOutGlue]>; 574def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", 575 SDCallSeqEnd<[ SDTCisVT<0, i32>, 576 SDTCisVT<1, i32> ]>, 577 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 578def AArch64call : SDNode<"AArch64ISD::CALL", 579 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 580 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 581 SDNPVariadic]>; 582 583def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", 584 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 585 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 586 SDNPVariadic]>; 587 588def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", 589 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 590 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 591 SDNPVariadic]>; 592 593def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, 594 [SDNPHasChain]>; 595def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, 596 [SDNPHasChain]>; 597def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, 598 [SDNPHasChain]>; 599def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, 600 [SDNPHasChain]>; 601def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, 602 [SDNPHasChain]>; 603 604 605def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; 606def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; 607def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; 608def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; 609def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone, 610 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 611def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; 612def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; 613def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, 614 [SDNPCommutative]>; 615def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; 616def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, 617 [SDNPCommutative]>; 618def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; 619def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; 620 621def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; 622def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; 623def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; 624 625def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; 626 627def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; 628def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, 629 [SDNPHasChain]>; 630def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, 631 [SDNPHasChain]>; 632def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), 633 [(AArch64strict_fcmp node:$lhs, node:$rhs), 634 (AArch64fcmp node:$lhs, node:$rhs)]>; 635 636def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; 637def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; 638def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; 639def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; 640def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; 641def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; 642 643def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; 644 645def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; 646def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; 647def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; 648def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; 649def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; 650def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; 651 652def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; 653def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; 654def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; 655def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; 656def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; 657def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; 658def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; 659 660def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; 661def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; 662def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; 663def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; 664 665def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; 666def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; 667def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; 668def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; 669def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; 670def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; 671def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; 672def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; 673def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; 674def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; 675 676def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; 677def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; 678 679def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; 680def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; 681def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; 682def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; 683def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; 684 685def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; 686def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; 687def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; 688 689def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; 690def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; 691def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; 692def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; 693def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; 694def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), 695 (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>; 696 697def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; 698def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; 699def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; 700def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; 701def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; 702 703def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; 704def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; 705 706def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, 707 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 708 709def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, 710 [SDNPHasChain, SDNPSideEffect]>; 711 712def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; 713def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; 714 715def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", 716 SDT_AArch64TLSDescCallSeq, 717 [SDNPInGlue, SDNPOutGlue, SDNPHasChain, 718 SDNPVariadic]>; 719 720 721def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", 722 SDT_AArch64WrapperLarge>; 723 724def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; 725 726def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 727 SDTCisSameAs<1, 2>]>; 728def AArch64pmull : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull, 729 [SDNPCommutative]>; 730def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull, 731 [SDNPCommutative]>; 732def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull, 733 [SDNPCommutative]>; 734 735def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; 736def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; 737def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; 738def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; 739 740def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>; 741def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>; 742 743def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; 744def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; 745def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; 746def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; 747def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; 748def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; 749 750def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), 751 [(abdu node:$lhs, node:$rhs), 752 (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; 753def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), 754 [(abds node:$lhs, node:$rhs), 755 (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; 756 757def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>; 758def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>; 759def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>; 760def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm), 761 [(AArch64addp_n node:$Rn, node:$Rm), 762 (int_aarch64_neon_addp node:$Rn, node:$Rm)]>; 763def AArch64uaddlp : PatFrags<(ops node:$src), 764 [(AArch64uaddlp_n node:$src), 765 (int_aarch64_neon_uaddlp node:$src)]>; 766def AArch64saddlp : PatFrags<(ops node:$src), 767 [(AArch64saddlp_n node:$src), 768 (int_aarch64_neon_saddlp node:$src)]>; 769def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm), 770 [(AArch64addp_n node:$Rn, node:$Rm), 771 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>; 772def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>; 773 774def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; 775def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 776def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 777def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 778def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 779 780def SDT_AArch64unpk : SDTypeProfile<1, 1, [ 781 SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0> 782]>; 783def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>; 784def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>; 785def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>; 786def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; 787 788def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 789def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 790def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 791def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 792 793def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; 794def AArch64mrs : SDNode<"AArch64ISD::MRS", 795 SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, 796 [SDNPHasChain, SDNPOutGlue]>; 797 798// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands 799// have no common bits. 800def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), 801 [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ 802 if (N->getOpcode() == ISD::ADD) 803 return true; 804 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); 805}]> { 806 let GISelPredicateCode = [{ 807 // Only handle G_ADD for now. FIXME. build capability to compute whether 808 // operands of G_OR have common bits set or not. 809 return MI.getOpcode() == TargetOpcode::G_ADD; 810 }]; 811} 812 813// Match mul with enough sign-bits. Can be reduced to a smaller mul operand. 814def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{ 815 return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 && 816 CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32; 817}]>; 818 819//===----------------------------------------------------------------------===// 820 821//===----------------------------------------------------------------------===// 822 823// AArch64 Instruction Predicate Definitions. 824// We could compute these on a per-module basis but doing so requires accessing 825// the Function object through the <Target>Subtarget and objections were raised 826// to that (see post-commit review comments for r301750). 827let RecomputePerFunction = 1 in { 828 def ForCodeSize : Predicate<"shouldOptForSize(MF)">; 829 def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">; 830 // Avoid generating STRQro if it is slow, unless we're optimizing for code size. 831 def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">; 832 833 def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 834 def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 835 836 def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 837 def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 838 // Toggles patterns which aren't beneficial in GlobalISel when we aren't 839 // optimizing. This allows us to selectively use patterns without impacting 840 // SelectionDAG's behaviour. 841 // FIXME: One day there will probably be a nicer way to check for this, but 842 // today is not that day. 843 def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; 844} 845 846include "AArch64InstrFormats.td" 847include "SVEInstrFormats.td" 848include "SMEInstrFormats.td" 849 850//===----------------------------------------------------------------------===// 851 852//===----------------------------------------------------------------------===// 853// Miscellaneous instructions. 854//===----------------------------------------------------------------------===// 855 856let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { 857// We set Sched to empty list because we expect these instructions to simply get 858// removed in most cases. 859def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 860 [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, 861 Sched<[]>; 862def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 863 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, 864 Sched<[]>; 865} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 866 867let isReMaterializable = 1, isCodeGenOnly = 1 in { 868// FIXME: The following pseudo instructions are only needed because remat 869// cannot handle multiple instructions. When that changes, they can be 870// removed, along with the AArch64Wrapper node. 871 872let AddedComplexity = 10 in 873def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), 874 [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, 875 Sched<[WriteLDAdr]>; 876 877// The MOVaddr instruction should match only when the add is not folded 878// into a load or store address. 879def MOVaddr 880 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 881 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), 882 tglobaladdr:$low))]>, 883 Sched<[WriteAdrAdr]>; 884def MOVaddrJT 885 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 886 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), 887 tjumptable:$low))]>, 888 Sched<[WriteAdrAdr]>; 889def MOVaddrCP 890 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 891 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), 892 tconstpool:$low))]>, 893 Sched<[WriteAdrAdr]>; 894def MOVaddrBA 895 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 896 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), 897 tblockaddress:$low))]>, 898 Sched<[WriteAdrAdr]>; 899def MOVaddrTLS 900 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 901 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), 902 tglobaltlsaddr:$low))]>, 903 Sched<[WriteAdrAdr]>; 904def MOVaddrEXT 905 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 906 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), 907 texternalsym:$low))]>, 908 Sched<[WriteAdrAdr]>; 909// Normally AArch64addlow either gets folded into a following ldr/str, 910// or together with an adrp into MOVaddr above. For cases with TLS, it 911// might appear without either of them, so allow lowering it into a plain 912// add. 913def ADDlowTLS 914 : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), 915 [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, 916 tglobaltlsaddr:$low))]>, 917 Sched<[WriteAdr]>; 918 919} // isReMaterializable, isCodeGenOnly 920 921def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), 922 (LOADgot tglobaltlsaddr:$addr)>; 923 924def : Pat<(AArch64LOADgot texternalsym:$addr), 925 (LOADgot texternalsym:$addr)>; 926 927def : Pat<(AArch64LOADgot tconstpool:$addr), 928 (LOADgot tconstpool:$addr)>; 929 930// In general these get lowered into a sequence of three 4-byte instructions. 931// 32-bit jump table destination is actually only 2 instructions since we can 932// use the table itself as a PC-relative base. But optimization occurs after 933// branch relaxation so be pessimistic. 934let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch", 935 isNotDuplicable = 1 in { 936def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 937 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 938 Sched<[]>; 939def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 940 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 941 Sched<[]>; 942def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 943 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 944 Sched<[]>; 945} 946 947// Space-consuming pseudo to aid testing of placement and reachability 948// algorithms. Immediate operand is the number of bytes this "instruction" 949// occupies; register operands can be used to enforce dependency and constrain 950// the scheduler. 951let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in 952def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn), 953 [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>, 954 Sched<[]>; 955 956let hasSideEffects = 1, isCodeGenOnly = 1 in { 957 def SpeculationSafeValueX 958 : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>; 959 def SpeculationSafeValueW 960 : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>; 961} 962 963// SpeculationBarrierEndBB must only be used after an unconditional control 964// flow, i.e. after a terminator for which isBarrier is True. 965let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { 966 // This gets lowered to a pair of 4-byte instructions. 967 let Size = 8 in 968 def SpeculationBarrierISBDSBEndBB 969 : Pseudo<(outs), (ins), []>, Sched<[]>; 970 // This gets lowered to a 4-byte instruction. 971 let Size = 4 in 972 def SpeculationBarrierSBEndBB 973 : Pseudo<(outs), (ins), []>, Sched<[]>; 974} 975 976//===----------------------------------------------------------------------===// 977// System instructions. 978//===----------------------------------------------------------------------===// 979 980def HINT : HintI<"hint">; 981def : InstAlias<"nop", (HINT 0b000)>; 982def : InstAlias<"yield",(HINT 0b001)>; 983def : InstAlias<"wfe", (HINT 0b010)>; 984def : InstAlias<"wfi", (HINT 0b011)>; 985def : InstAlias<"sev", (HINT 0b100)>; 986def : InstAlias<"sevl", (HINT 0b101)>; 987def : InstAlias<"dgh", (HINT 0b110)>; 988def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; 989def : InstAlias<"csdb", (HINT 20)>; 990// In order to be able to write readable assembly, LLVM should accept assembly 991// inputs that use Branch Target Indentification mnemonics, even with BTI disabled. 992// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 993// should not emit these mnemonics unless BTI is enabled. 994def : InstAlias<"bti", (HINT 32), 0>; 995def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; 996def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; 997def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; 998 999// v8.2a Statistical Profiling extension 1000def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; 1001 1002// As far as LLVM is concerned this writes to the system's exclusive monitors. 1003let mayLoad = 1, mayStore = 1 in 1004def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; 1005 1006// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot 1007// model patterns with sufficiently fine granularity. 1008let mayLoad = ?, mayStore = ? in { 1009def DMB : CRmSystemI<barrier_op, 0b101, "dmb", 1010 [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>; 1011 1012def DSB : CRmSystemI<barrier_op, 0b100, "dsb", 1013 [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>; 1014 1015def ISB : CRmSystemI<barrier_op, 0b110, "isb", 1016 [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>; 1017 1018def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> { 1019 let CRm = 0b0010; 1020 let Inst{12} = 0; 1021 let Predicates = [HasTRACEV8_4]; 1022} 1023 1024def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> { 1025 let CRm{1-0} = 0b11; 1026 let Inst{9-8} = 0b10; 1027 let Predicates = [HasXS]; 1028} 1029 1030let Predicates = [HasWFxT] in { 1031def WFET : RegInputSystemI<0b0000, 0b000, "wfet">; 1032def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">; 1033} 1034 1035// Branch Record Buffer two-word mnemonic instructions 1036class BRBEI<bits<3> op2, string keyword> 1037 : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> { 1038 let Inst{31-8} = 0b110101010000100101110010; 1039 let Inst{7-5} = op2; 1040 let Predicates = [HasBRBE]; 1041} 1042def BRB_IALL: BRBEI<0b100, "\tiall">; 1043def BRB_INJ: BRBEI<0b101, "\tinj">; 1044 1045} 1046 1047// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ 1048def : TokenAlias<"INJ", "inj">; 1049def : TokenAlias<"IALL", "iall">; 1050 1051// ARMv8.2-A Dot Product 1052let Predicates = [HasDotProd] in { 1053defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>; 1054defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>; 1055defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>; 1056defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>; 1057} 1058 1059// ARMv8.6-A BFloat 1060let Predicates = [HasNEON, HasBF16] in { 1061defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">; 1062defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">; 1063def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">; 1064def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1065def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1066def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1067def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1068def BFCVTN : SIMD_BFCVTN; 1069def BFCVTN2 : SIMD_BFCVTN2; 1070 1071// Vector-scalar BFDOT: 1072// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit 1073// register (the instruction uses a single 32-bit lane from it), so the pattern 1074// is a bit tricky. 1075def : Pat<(v2f32 (int_aarch64_neon_bfdot 1076 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1077 (v4bf16 (bitconvert 1078 (v2i32 (AArch64duplane32 1079 (v4i32 (bitconvert 1080 (v8bf16 (insert_subvector undef, 1081 (v4bf16 V64:$Rm), 1082 (i64 0))))), 1083 VectorIndexS:$idx)))))), 1084 (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1085 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 1086 VectorIndexS:$idx)>; 1087} 1088 1089let Predicates = [HasNEONorSME, HasBF16] in { 1090def BFCVT : BF16ToSinglePrecision<"bfcvt">; 1091} 1092 1093// ARMv8.6A AArch64 matrix multiplication 1094let Predicates = [HasMatMulInt8] in { 1095def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>; 1096def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>; 1097def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>; 1098defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>; 1099defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>; 1100 1101// sudot lane has a pattern where usdot is expected (there is no sudot). 1102// The second operand is used in the dup operation to repeat the indexed 1103// element. 1104class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind, 1105 string rhs_kind, RegisterOperand RegType, 1106 ValueType AccumType, ValueType InputType> 1107 : BaseSIMDThreeSameVectorDotIndex<Q, 0, 1, 0b00, "sudot", dst_kind, 1108 lhs_kind, rhs_kind, RegType, AccumType, 1109 InputType, null_frag> { 1110 let Pattern = [(set (AccumType RegType:$dst), 1111 (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd), 1112 (InputType (bitconvert (AccumType 1113 (AArch64duplane32 (v4i32 V128:$Rm), 1114 VectorIndexS:$idx)))), 1115 (InputType RegType:$Rn))))]; 1116} 1117 1118multiclass SIMDSUDOTIndex { 1119 def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>; 1120 def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>; 1121} 1122 1123defm SUDOTlane : SIMDSUDOTIndex; 1124 1125} 1126 1127// ARMv8.2-A FP16 Fused Multiply-Add Long 1128let Predicates = [HasNEON, HasFP16FML] in { 1129defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; 1130defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>; 1131defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>; 1132defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>; 1133defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>; 1134defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>; 1135defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>; 1136defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>; 1137} 1138 1139// Armv8.2-A Crypto extensions 1140let Predicates = [HasSHA3] in { 1141def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; 1142def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">; 1143def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">; 1144def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">; 1145def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">; 1146def EOR3 : CryptoRRRR_16B<0b00, "eor3">; 1147def BCAX : CryptoRRRR_16B<0b01, "bcax">; 1148def XAR : CryptoRRRi6<"xar">; 1149 1150class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy> 1151 : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))), 1152 (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>; 1153 1154def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1155 (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1156 1157def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>; 1158def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>; 1159def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>; 1160 1161def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>; 1162def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>; 1163def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>; 1164def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>; 1165 1166class EOR3_pattern<ValueType VecTy> 1167 : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)), 1168 (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1169 1170def : EOR3_pattern<v16i8>; 1171def : EOR3_pattern<v8i16>; 1172def : EOR3_pattern<v4i32>; 1173def : EOR3_pattern<v2i64>; 1174 1175class BCAX_pattern<ValueType VecTy> 1176 : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))), 1177 (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1178 1179def : BCAX_pattern<v16i8>; 1180def : BCAX_pattern<v8i16>; 1181def : BCAX_pattern<v4i32>; 1182def : BCAX_pattern<v2i64>; 1183 1184def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>; 1185def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>; 1186def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>; 1187def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>; 1188 1189def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>; 1190def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>; 1191def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>; 1192def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>; 1193 1194def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>; 1195def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>; 1196def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>; 1197def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>; 1198 1199def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1200 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1201 1202def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))), 1203 (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>; 1204 1205 1206} // HasSHA3 1207 1208let Predicates = [HasSM4] in { 1209def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">; 1210def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">; 1211def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">; 1212def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">; 1213def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">; 1214def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">; 1215def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">; 1216def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">; 1217def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">; 1218 1219def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))), 1220 (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>; 1221 1222class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode> 1223 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1224 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1225 1226class SM3TT_pattern<Instruction INST, Intrinsic OpNode> 1227 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )), 1228 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>; 1229 1230class SM4_pattern<Instruction INST, Intrinsic OpNode> 1231 : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1232 (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1233 1234def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>; 1235def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>; 1236 1237def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>; 1238def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>; 1239def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>; 1240def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>; 1241 1242def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>; 1243def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>; 1244} // HasSM4 1245 1246let Predicates = [HasRCPC] in { 1247 // v8.3 Release Consistent Processor Consistent support, optional in v8.2. 1248 def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; 1249 def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; 1250 def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; 1251 def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; 1252} 1253 1254// v8.3a complex add and multiply-accumulate. No predicate here, that is done 1255// inside the multiclass as the FP16 versions need different predicates. 1256defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, 1257 "fcmla", null_frag>; 1258defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, 1259 "fcadd", null_frag>; 1260defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">; 1261 1262let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1263 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1264 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>; 1265 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1266 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>; 1267 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1268 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>; 1269 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1270 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; 1271} 1272 1273let Predicates = [HasComplxNum, HasNEON] in { 1274 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1275 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; 1276 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1277 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>; 1278 foreach Ty = [v4f32, v2f64] in { 1279 def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))), 1280 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>; 1281 def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))), 1282 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>; 1283 } 1284} 1285 1286multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> { 1287 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1288 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>; 1289 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1290 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>; 1291 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1292 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>; 1293 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1294 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>; 1295} 1296 1297multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> { 1298 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1299 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>; 1300 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1301 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>; 1302 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1303 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>; 1304 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1305 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>; 1306} 1307 1308 1309let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1310 defm : FCMLA_PATS<v4f16, V64>; 1311 defm : FCMLA_PATS<v8f16, V128>; 1312 1313 defm : FCMLA_LANE_PATS<v4f16, V64, 1314 (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>; 1315 defm : FCMLA_LANE_PATS<v8f16, V128, 1316 (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>; 1317} 1318let Predicates = [HasComplxNum, HasNEON] in { 1319 defm : FCMLA_PATS<v2f32, V64>; 1320 defm : FCMLA_PATS<v4f32, V128>; 1321 defm : FCMLA_PATS<v2f64, V128>; 1322 1323 defm : FCMLA_LANE_PATS<v4f32, V128, 1324 (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>; 1325} 1326 1327// v8.3a Pointer Authentication 1328// These instructions inhabit part of the hint space and so can be used for 1329// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is 1330// important for compatibility with other assemblers (e.g. GAS) when building 1331// software compatible with both CPUs that do or don't implement PA. 1332let Uses = [LR], Defs = [LR] in { 1333 def PACIAZ : SystemNoOperands<0b000, "hint\t#24">; 1334 def PACIBZ : SystemNoOperands<0b010, "hint\t#26">; 1335 let isAuthenticated = 1 in { 1336 def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">; 1337 def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">; 1338 } 1339} 1340let Uses = [LR, SP], Defs = [LR] in { 1341 def PACIASP : SystemNoOperands<0b001, "hint\t#25">; 1342 def PACIBSP : SystemNoOperands<0b011, "hint\t#27">; 1343 let isAuthenticated = 1 in { 1344 def AUTIASP : SystemNoOperands<0b101, "hint\t#29">; 1345 def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">; 1346 } 1347} 1348let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { 1349 def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">; 1350 def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">; 1351 let isAuthenticated = 1 in { 1352 def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">; 1353 def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">; 1354 } 1355} 1356 1357let Uses = [LR], Defs = [LR], CRm = 0b0000 in { 1358 def XPACLRI : SystemNoOperands<0b111, "hint\t#7">; 1359} 1360 1361// In order to be able to write readable assembly, LLVM should accept assembly 1362// inputs that use pointer authentication mnemonics, even with PA disabled. 1363// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1364// should not emit these mnemonics unless PA is enabled. 1365def : InstAlias<"paciaz", (PACIAZ), 0>; 1366def : InstAlias<"pacibz", (PACIBZ), 0>; 1367def : InstAlias<"autiaz", (AUTIAZ), 0>; 1368def : InstAlias<"autibz", (AUTIBZ), 0>; 1369def : InstAlias<"paciasp", (PACIASP), 0>; 1370def : InstAlias<"pacibsp", (PACIBSP), 0>; 1371def : InstAlias<"autiasp", (AUTIASP), 0>; 1372def : InstAlias<"autibsp", (AUTIBSP), 0>; 1373def : InstAlias<"pacia1716", (PACIA1716), 0>; 1374def : InstAlias<"pacib1716", (PACIB1716), 0>; 1375def : InstAlias<"autia1716", (AUTIA1716), 0>; 1376def : InstAlias<"autib1716", (AUTIB1716), 0>; 1377def : InstAlias<"xpaclri", (XPACLRI), 0>; 1378 1379// These pointer authentication instructions require armv8.3a 1380let Predicates = [HasPAuth] in { 1381 1382 // When PA is enabled, a better mnemonic should be emitted. 1383 def : InstAlias<"paciaz", (PACIAZ), 1>; 1384 def : InstAlias<"pacibz", (PACIBZ), 1>; 1385 def : InstAlias<"autiaz", (AUTIAZ), 1>; 1386 def : InstAlias<"autibz", (AUTIBZ), 1>; 1387 def : InstAlias<"paciasp", (PACIASP), 1>; 1388 def : InstAlias<"pacibsp", (PACIBSP), 1>; 1389 def : InstAlias<"autiasp", (AUTIASP), 1>; 1390 def : InstAlias<"autibsp", (AUTIBSP), 1>; 1391 def : InstAlias<"pacia1716", (PACIA1716), 1>; 1392 def : InstAlias<"pacib1716", (PACIB1716), 1>; 1393 def : InstAlias<"autia1716", (AUTIA1716), 1>; 1394 def : InstAlias<"autib1716", (AUTIB1716), 1>; 1395 def : InstAlias<"xpaclri", (XPACLRI), 1>; 1396 1397 multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm, 1398 SDPatternOperator op> { 1399 def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>; 1400 def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>; 1401 def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>; 1402 def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>; 1403 def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>; 1404 def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>; 1405 def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>; 1406 def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>; 1407 } 1408 1409 defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>; 1410 defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>; 1411 1412 def XPACI : ClearAuth<0, "xpaci">; 1413 def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>; 1414 def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>; 1415 1416 def XPACD : ClearAuth<1, "xpacd">; 1417 def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>; 1418 def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>; 1419 1420 def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>; 1421 1422 // Combined Instructions 1423 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1424 def BRAA : AuthBranchTwoOperands<0, 0, "braa">; 1425 def BRAB : AuthBranchTwoOperands<0, 1, "brab">; 1426 } 1427 let isCall = 1, Defs = [LR], Uses = [SP] in { 1428 def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; 1429 def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; 1430 } 1431 1432 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1433 def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; 1434 def BRABZ : AuthOneOperand<0b000, 1, "brabz">; 1435 } 1436 let isCall = 1, Defs = [LR], Uses = [SP] in { 1437 def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; 1438 def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; 1439 } 1440 1441 let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 1442 def RETAA : AuthReturn<0b010, 0, "retaa">; 1443 def RETAB : AuthReturn<0b010, 1, "retab">; 1444 def ERETAA : AuthReturn<0b100, 0, "eretaa">; 1445 def ERETAB : AuthReturn<0b100, 1, "eretab">; 1446 } 1447 1448 defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; 1449 defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; 1450 1451} 1452 1453// v8.3a floating point conversion for javascript 1454let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in 1455def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, 1456 "fjcvtzs", 1457 [(set GPR32:$Rd, 1458 (int_aarch64_fjcvtzs FPR64:$Rn))]> { 1459 let Inst{31} = 0; 1460} // HasJS, HasFPARMv8 1461 1462// v8.4 Flag manipulation instructions 1463let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in { 1464def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { 1465 let Inst{20-5} = 0b0000001000000000; 1466} 1467def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">; 1468def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; 1469def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", 1470 "{\t$Rn, $imm, $mask}">; 1471} // HasFlagM 1472 1473// v8.5 flag manipulation instructions 1474let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in { 1475 1476def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> { 1477 let Inst{18-16} = 0b000; 1478 let Inst{11-8} = 0b0000; 1479 let Unpredictable{11-8} = 0b1111; 1480 let Inst{7-5} = 0b001; 1481} 1482 1483def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> { 1484 let Inst{18-16} = 0b000; 1485 let Inst{11-8} = 0b0000; 1486 let Unpredictable{11-8} = 0b1111; 1487 let Inst{7-5} = 0b010; 1488} 1489} // HasAltNZCV 1490 1491 1492// Armv8.5-A speculation barrier 1493def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> { 1494 let Inst{20-5} = 0b0001100110000111; 1495 let Unpredictable{11-8} = 0b1111; 1496 let Predicates = [HasSB]; 1497 let hasSideEffects = 1; 1498} 1499 1500def : InstAlias<"clrex", (CLREX 0xf)>; 1501def : InstAlias<"isb", (ISB 0xf)>; 1502def : InstAlias<"ssbb", (DSB 0)>; 1503def : InstAlias<"pssbb", (DSB 4)>; 1504def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>; 1505 1506def MRS : MRSI; 1507def MSR : MSRI; 1508def MSRpstateImm1 : MSRpstateImm0_1; 1509def MSRpstateImm4 : MSRpstateImm0_15; 1510 1511def : Pat<(AArch64mrs imm:$id), 1512 (MRS imm:$id)>; 1513 1514// The thread pointer (on Linux, at least, where this has been implemented) is 1515// TPIDR_EL0. 1516def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), 1517 [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; 1518 1519// This gets lowered into a 24-byte instruction sequence 1520let Defs = [ X9, X16, X17, NZCV ], Size = 24 in { 1521def KCFI_CHECK : Pseudo< 1522 (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>; 1523} 1524 1525let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { 1526def HWASAN_CHECK_MEMACCESS : Pseudo< 1527 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1528 [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1529 Sched<[]>; 1530} 1531 1532let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in { 1533def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< 1534 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1535 [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1536 Sched<[]>; 1537} 1538 1539// The virtual cycle counter register is CNTVCT_EL0. 1540def : Pat<(readcyclecounter), (MRS 0xdf02)>; 1541 1542// FPCR register 1543let Uses = [FPCR] in 1544def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins), 1545 [(set GPR64:$dst, (int_aarch64_get_fpcr))]>, 1546 PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>, 1547 Sched<[WriteSys]>; 1548let Defs = [FPCR] in 1549def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val), 1550 [(int_aarch64_set_fpcr i64:$val)]>, 1551 PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>, 1552 Sched<[WriteSys]>; 1553 1554// Generic system instructions 1555def SYSxt : SystemXtI<0, "sys">; 1556def SYSLxt : SystemLXtI<1, "sysl">; 1557 1558def : InstAlias<"sys $op1, $Cn, $Cm, $op2", 1559 (SYSxt imm0_7:$op1, sys_cr_op:$Cn, 1560 sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 1561 1562 1563let Predicates = [HasTME] in { 1564 1565def TSTART : TMSystemI<0b0000, "tstart", 1566 [(set GPR64:$Rt, (int_aarch64_tstart))]>; 1567 1568def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; 1569 1570def TCANCEL : TMSystemException<0b011, "tcancel", 1571 [(int_aarch64_tcancel timm64_0_65535:$imm)]>; 1572 1573def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> { 1574 let mayLoad = 0; 1575 let mayStore = 0; 1576} 1577} // HasTME 1578 1579//===----------------------------------------------------------------------===// 1580// Move immediate instructions. 1581//===----------------------------------------------------------------------===// 1582 1583defm MOVK : InsertImmediate<0b11, "movk">; 1584defm MOVN : MoveImmediate<0b00, "movn">; 1585 1586let PostEncoderMethod = "fixMOVZ" in 1587defm MOVZ : MoveImmediate<0b10, "movz">; 1588 1589// First group of aliases covers an implicit "lsl #0". 1590def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>; 1591def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>; 1592def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1593def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1594def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1595def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1596 1597// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. 1598def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1599def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1600def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1601def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1602 1603def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1604def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1605def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1606def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1607 1608def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>; 1609def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>; 1610def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>; 1611def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>; 1612 1613def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1614def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1615 1616def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1617def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1618 1619def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>; 1620def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>; 1621 1622// Final group of aliases covers true "mov $Rd, $imm" cases. 1623multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, 1624 int width, int shift> { 1625 def _asmoperand : AsmOperandClass { 1626 let Name = basename # width # "_lsl" # shift # "MovAlias"; 1627 let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " 1628 # shift # ">"; 1629 let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; 1630 } 1631 1632 def _movimm : Operand<i32> { 1633 let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); 1634 } 1635 1636 def : InstAlias<"mov $Rd, $imm", 1637 (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; 1638} 1639 1640defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; 1641defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; 1642 1643defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; 1644defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; 1645defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; 1646defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; 1647 1648defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; 1649defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; 1650 1651defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; 1652defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; 1653defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; 1654defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; 1655 1656let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, 1657 isAsCheapAsAMove = 1 in { 1658// FIXME: The following pseudo instructions are only needed because remat 1659// cannot handle multiple instructions. When that changes, we can select 1660// directly to the real instructions and get rid of these pseudos. 1661 1662def MOVi32imm 1663 : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), 1664 [(set GPR32:$dst, imm:$src)]>, 1665 Sched<[WriteImm]>; 1666def MOVi64imm 1667 : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), 1668 [(set GPR64:$dst, imm:$src)]>, 1669 Sched<[WriteImm]>; 1670} // isReMaterializable, isCodeGenOnly 1671 1672// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the 1673// eventual expansion code fewer bits to worry about getting right. Marshalling 1674// the types is a little tricky though: 1675def i64imm_32bit : ImmLeaf<i64, [{ 1676 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 1677}]>; 1678 1679def s64imm_32bit : ImmLeaf<i64, [{ 1680 int64_t Imm64 = static_cast<int64_t>(Imm); 1681 return Imm64 >= std::numeric_limits<int32_t>::min() && 1682 Imm64 <= std::numeric_limits<int32_t>::max(); 1683}]>; 1684 1685def trunc_imm : SDNodeXForm<imm, [{ 1686 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); 1687}]>; 1688 1689def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, 1690 GISDNodeXFormEquiv<trunc_imm>; 1691 1692let Predicates = [OptimizedGISelOrOtherSelector] in { 1693// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless 1694// copies. 1695def : Pat<(i64 i64imm_32bit:$src), 1696 (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; 1697} 1698 1699// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). 1700def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 1701return CurDAG->getTargetConstant( 1702 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 1703}]>; 1704 1705def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 1706return CurDAG->getTargetConstant( 1707 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 1708}]>; 1709 1710 1711def : Pat<(f32 fpimm:$in), 1712 (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; 1713def : Pat<(f64 fpimm:$in), 1714 (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; 1715 1716 1717// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK 1718// sequences. 1719def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, 1720 tglobaladdr:$g1, tglobaladdr:$g0), 1721 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), 1722 tglobaladdr:$g1, 16), 1723 tglobaladdr:$g2, 32), 1724 tglobaladdr:$g3, 48)>; 1725 1726def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, 1727 tblockaddress:$g1, tblockaddress:$g0), 1728 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), 1729 tblockaddress:$g1, 16), 1730 tblockaddress:$g2, 32), 1731 tblockaddress:$g3, 48)>; 1732 1733def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, 1734 tconstpool:$g1, tconstpool:$g0), 1735 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), 1736 tconstpool:$g1, 16), 1737 tconstpool:$g2, 32), 1738 tconstpool:$g3, 48)>; 1739 1740def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, 1741 tjumptable:$g1, tjumptable:$g0), 1742 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), 1743 tjumptable:$g1, 16), 1744 tjumptable:$g2, 32), 1745 tjumptable:$g3, 48)>; 1746 1747 1748//===----------------------------------------------------------------------===// 1749// Arithmetic instructions. 1750//===----------------------------------------------------------------------===// 1751 1752// Add/subtract with carry. 1753defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; 1754defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; 1755 1756def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; 1757def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; 1758def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; 1759def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; 1760 1761// Add/subtract 1762defm ADD : AddSub<0, "add", "sub", add>; 1763defm SUB : AddSub<1, "sub", "add">; 1764 1765def : InstAlias<"mov $dst, $src", 1766 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; 1767def : InstAlias<"mov $dst, $src", 1768 (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; 1769def : InstAlias<"mov $dst, $src", 1770 (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; 1771def : InstAlias<"mov $dst, $src", 1772 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; 1773 1774defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; 1775defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; 1776 1777def copyFromSP: PatLeaf<(i64 GPR64:$src), [{ 1778 return N->getOpcode() == ISD::CopyFromReg && 1779 cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP; 1780}]>; 1781 1782// Use SUBS instead of SUB to enable CSE between SUBS and SUB. 1783def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), 1784 (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; 1785def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), 1786 (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; 1787def : Pat<(sub GPR32:$Rn, GPR32:$Rm), 1788 (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; 1789def : Pat<(sub GPR64:$Rn, GPR64:$Rm), 1790 (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; 1791def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), 1792 (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; 1793def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), 1794 (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; 1795let AddedComplexity = 1 in { 1796def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), 1797 (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; 1798def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), 1799 (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; 1800def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)), 1801 (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>; 1802} 1803 1804// Because of the immediate format for add/sub-imm instructions, the 1805// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 1806// These patterns capture that transformation. 1807let AddedComplexity = 1 in { 1808def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1809 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1810def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1811 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1812def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1813 (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1814def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1815 (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1816} 1817 1818// Because of the immediate format for add/sub-imm instructions, the 1819// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 1820// These patterns capture that transformation. 1821let AddedComplexity = 1 in { 1822def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1823 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1824def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1825 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1826def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1827 (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1828def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1829 (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1830} 1831 1832def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 1833def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 1834def : InstAlias<"neg $dst, $src$shift", 1835 (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 1836def : InstAlias<"neg $dst, $src$shift", 1837 (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 1838 1839def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 1840def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 1841def : InstAlias<"negs $dst, $src$shift", 1842 (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 1843def : InstAlias<"negs $dst, $src$shift", 1844 (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 1845 1846 1847// Unsigned/Signed divide 1848defm UDIV : Div<0, "udiv", udiv>; 1849defm SDIV : Div<1, "sdiv", sdiv>; 1850 1851def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; 1852def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; 1853def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; 1854def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; 1855 1856// Variable shift 1857defm ASRV : Shift<0b10, "asr", sra>; 1858defm LSLV : Shift<0b00, "lsl", shl>; 1859defm LSRV : Shift<0b01, "lsr", srl>; 1860defm RORV : Shift<0b11, "ror", rotr>; 1861 1862def : ShiftAlias<"asrv", ASRVWr, GPR32>; 1863def : ShiftAlias<"asrv", ASRVXr, GPR64>; 1864def : ShiftAlias<"lslv", LSLVWr, GPR32>; 1865def : ShiftAlias<"lslv", LSLVXr, GPR64>; 1866def : ShiftAlias<"lsrv", LSRVWr, GPR32>; 1867def : ShiftAlias<"lsrv", LSRVXr, GPR64>; 1868def : ShiftAlias<"rorv", RORVWr, GPR32>; 1869def : ShiftAlias<"rorv", RORVXr, GPR64>; 1870 1871// Multiply-add 1872let AddedComplexity = 5 in { 1873defm MADD : MulAccum<0, "madd">; 1874defm MSUB : MulAccum<1, "msub">; 1875 1876def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), 1877 (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1878def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), 1879 (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1880 1881def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), 1882 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1883def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), 1884 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1885def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), 1886 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1887def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), 1888 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1889} // AddedComplexity = 5 1890 1891let AddedComplexity = 5 in { 1892def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; 1893def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; 1894def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; 1895def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; 1896 1897def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))), 1898 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1899def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))), 1900 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1901def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), 1902 (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 1903def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))), 1904 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1905def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))), 1906 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1907def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), 1908 (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 1909 1910def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), 1911 (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 1912def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), 1913 (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 1914 1915def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), 1916 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1917def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), 1918 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1919def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), 1920 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 1921 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1922 1923def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 1924 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1925def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 1926 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1927def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), 1928 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 1929 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1930 1931def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), 1932 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1933def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), 1934 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1935def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), 1936 GPR64:$Ra)), 1937 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 1938 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1939 1940def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 1941 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1942def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 1943 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1944def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), 1945 (s64imm_32bit:$C)))), 1946 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 1947 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1948 1949def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)), 1950 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1951def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))), 1952 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1953 1954def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)), 1955 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 1956def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)), 1957 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 1958 1959def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 1960 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1961def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 1962 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1963 1964def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 1965 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 1966def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 1967 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 1968 1969def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)), 1970 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1971def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))), 1972 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1973 1974def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)), 1975 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 1976def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)), 1977 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 1978 1979def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))), 1980 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1981def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 1982 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1983 1984def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))), 1985 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 1986def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 1987 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 1988} // AddedComplexity = 5 1989 1990def : MulAccumWAlias<"mul", MADDWrrr>; 1991def : MulAccumXAlias<"mul", MADDXrrr>; 1992def : MulAccumWAlias<"mneg", MSUBWrrr>; 1993def : MulAccumXAlias<"mneg", MSUBXrrr>; 1994def : WideMulAccumAlias<"smull", SMADDLrrr>; 1995def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; 1996def : WideMulAccumAlias<"umull", UMADDLrrr>; 1997def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; 1998 1999// Multiply-high 2000def SMULHrr : MulHi<0b010, "smulh", mulhs>; 2001def UMULHrr : MulHi<0b110, "umulh", mulhu>; 2002 2003// CRC32 2004def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; 2005def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; 2006def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; 2007def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; 2008 2009def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; 2010def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; 2011def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; 2012def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; 2013 2014// v8.1 atomic CAS 2015defm CAS : CompareAndSwap<0, 0, "">; 2016defm CASA : CompareAndSwap<1, 0, "a">; 2017defm CASL : CompareAndSwap<0, 1, "l">; 2018defm CASAL : CompareAndSwap<1, 1, "al">; 2019 2020// v8.1 atomic CASP 2021defm CASP : CompareAndSwapPair<0, 0, "">; 2022defm CASPA : CompareAndSwapPair<1, 0, "a">; 2023defm CASPL : CompareAndSwapPair<0, 1, "l">; 2024defm CASPAL : CompareAndSwapPair<1, 1, "al">; 2025 2026// v8.1 atomic SWP 2027defm SWP : Swap<0, 0, "">; 2028defm SWPA : Swap<1, 0, "a">; 2029defm SWPL : Swap<0, 1, "l">; 2030defm SWPAL : Swap<1, 1, "al">; 2031 2032// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) 2033defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; 2034defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; 2035defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; 2036defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; 2037 2038defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; 2039defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; 2040defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; 2041defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; 2042 2043defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; 2044defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; 2045defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; 2046defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; 2047 2048defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; 2049defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; 2050defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; 2051defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; 2052 2053defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; 2054defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; 2055defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; 2056defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; 2057 2058defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; 2059defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; 2060defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; 2061defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; 2062 2063defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; 2064defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; 2065defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; 2066defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; 2067 2068defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; 2069defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; 2070defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; 2071defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; 2072 2073// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" 2074defm : STOPregister<"stadd","LDADD">; // STADDx 2075defm : STOPregister<"stclr","LDCLR">; // STCLRx 2076defm : STOPregister<"steor","LDEOR">; // STEORx 2077defm : STOPregister<"stset","LDSET">; // STSETx 2078defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx 2079defm : STOPregister<"stsmin","LDSMIN">;// STSMINx 2080defm : STOPregister<"stumax","LDUMAX">;// STUMAXx 2081defm : STOPregister<"stumin","LDUMIN">;// STUMINx 2082 2083// v8.5 Memory Tagging Extension 2084let Predicates = [HasMTE] in { 2085 2086def IRG : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg", 2087 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>; 2088 2089def GMI : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi", 2090 int_aarch64_gmi, GPR64sp>, Sched<[]> { 2091 let isNotDuplicable = 1; 2092} 2093def ADDG : AddSubG<0, "addg", null_frag>; 2094def SUBG : AddSubG<1, "subg", null_frag>; 2095 2096def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>; 2097 2098def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>; 2099def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{ 2100 let Defs = [NZCV]; 2101} 2102 2103def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>; 2104 2105def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">; 2106 2107def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4), 2108 (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>; 2109def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2110 (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2111 2112def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>; 2113 2114def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]", 2115 (outs GPR64:$Rt), (ins GPR64sp:$Rn)>; 2116def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]", 2117 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>; 2118def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]", 2119 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> { 2120 let Inst{23} = 0; 2121} 2122 2123defm STG : MemTagStore<0b00, "stg">; 2124defm STZG : MemTagStore<0b01, "stzg">; 2125defm ST2G : MemTagStore<0b10, "st2g">; 2126defm STZ2G : MemTagStore<0b11, "stz2g">; 2127 2128def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2129 (STGOffset $Rn, $Rm, $imm)>; 2130def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2131 (STZGOffset $Rn, $Rm, $imm)>; 2132def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2133 (ST2GOffset $Rn, $Rm, $imm)>; 2134def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2135 (STZ2GOffset $Rn, $Rm, $imm)>; 2136 2137defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">; 2138def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">; 2139def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; 2140 2141def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2142 (STGOffset GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2143 2144def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2), 2145 (STGPi $Rt, $Rt2, $Rn, $imm)>; 2146 2147def IRGstack 2148 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>, 2149 Sched<[]>; 2150def TAGPstack 2151 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>, 2152 Sched<[]>; 2153 2154// Explicit SP in the first operand prevents ShrinkWrap optimization 2155// from leaving this instruction out of the stack frame. When IRGstack 2156// is transformed into IRG, this operand is replaced with the actual 2157// register / expression for the tagged base pointer of the current function. 2158def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; 2159 2160// Large STG to be expanded into a loop. $sz is the size, $Rn is start address. 2161// $Rn_wback is one past the end of the range. $Rm is the loop counter. 2162let isCodeGenOnly=1, mayStore=1 in { 2163def STGloop_wback 2164 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2165 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2166 Sched<[WriteAdr, WriteST]>; 2167 2168def STZGloop_wback 2169 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2170 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2171 Sched<[WriteAdr, WriteST]>; 2172 2173// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn. 2174// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back). 2175def STGloop 2176 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2177 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2178 Sched<[WriteAdr, WriteST]>; 2179 2180def STZGloop 2181 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2182 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2183 Sched<[WriteAdr, WriteST]>; 2184} 2185 2186} // Predicates = [HasMTE] 2187 2188//===----------------------------------------------------------------------===// 2189// Logical instructions. 2190//===----------------------------------------------------------------------===// 2191 2192// (immediate) 2193defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; 2194defm AND : LogicalImm<0b00, "and", and, "bic">; 2195defm EOR : LogicalImm<0b10, "eor", xor, "eon">; 2196defm ORR : LogicalImm<0b01, "orr", or, "orn">; 2197 2198// FIXME: these aliases *are* canonical sometimes (when movz can't be 2199// used). Actually, it seems to be working right now, but putting logical_immXX 2200// here is a bit dodgy on the AsmParser side too. 2201def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, 2202 logical_imm32:$imm), 0>; 2203def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, 2204 logical_imm64:$imm), 0>; 2205 2206 2207// (register) 2208defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; 2209defm BICS : LogicalRegS<0b11, 1, "bics", 2210 BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; 2211defm AND : LogicalReg<0b00, 0, "and", and>; 2212defm BIC : LogicalReg<0b00, 1, "bic", 2213 BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>; 2214defm EON : LogicalReg<0b10, 1, "eon", 2215 BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; 2216defm EOR : LogicalReg<0b10, 0, "eor", xor>; 2217defm ORN : LogicalReg<0b01, 1, "orn", 2218 BinOpFrag<(or node:$LHS, (not node:$RHS))>>; 2219defm ORR : LogicalReg<0b01, 0, "orr", or>; 2220 2221def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; 2222def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; 2223 2224def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; 2225def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; 2226 2227def : InstAlias<"mvn $Wd, $Wm$sh", 2228 (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; 2229def : InstAlias<"mvn $Xd, $Xm$sh", 2230 (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; 2231 2232def : InstAlias<"tst $src1, $src2", 2233 (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; 2234def : InstAlias<"tst $src1, $src2", 2235 (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; 2236 2237def : InstAlias<"tst $src1, $src2", 2238 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; 2239def : InstAlias<"tst $src1, $src2", 2240 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; 2241 2242def : InstAlias<"tst $src1, $src2$sh", 2243 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; 2244def : InstAlias<"tst $src1, $src2$sh", 2245 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; 2246 2247 2248def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; 2249def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; 2250 2251 2252//===----------------------------------------------------------------------===// 2253// One operand data processing instructions. 2254//===----------------------------------------------------------------------===// 2255 2256defm CLS : OneOperandData<0b000101, "cls">; 2257defm CLZ : OneOperandData<0b000100, "clz", ctlz>; 2258defm RBIT : OneOperandData<0b000000, "rbit", bitreverse>; 2259 2260def REV16Wr : OneWRegData<0b000001, "rev16", 2261 UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; 2262def REV16Xr : OneXRegData<0b000001, "rev16", null_frag>; 2263 2264def : Pat<(cttz GPR32:$Rn), 2265 (CLZWr (RBITWr GPR32:$Rn))>; 2266def : Pat<(cttz GPR64:$Rn), 2267 (CLZXr (RBITXr GPR64:$Rn))>; 2268def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), 2269 (i32 1))), 2270 (CLSWr GPR32:$Rn)>; 2271def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), 2272 (i64 1))), 2273 (CLSXr GPR64:$Rn)>; 2274def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>; 2275def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>; 2276 2277// Unlike the other one operand instructions, the instructions with the "rev" 2278// mnemonic do *not* just different in the size bit, but actually use different 2279// opcode bits for the different sizes. 2280def REVWr : OneWRegData<0b000010, "rev", bswap>; 2281def REVXr : OneXRegData<0b000011, "rev", bswap>; 2282def REV32Xr : OneXRegData<0b000010, "rev32", 2283 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; 2284 2285def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; 2286 2287// The bswap commutes with the rotr so we want a pattern for both possible 2288// orders. 2289def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; 2290def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; 2291 2292// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero. 2293def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; 2294def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; 2295 2296def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)), 2297 (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))), 2298 (REV16Xr GPR64:$Rn)>; 2299 2300//===----------------------------------------------------------------------===// 2301// Bitfield immediate extraction instruction. 2302//===----------------------------------------------------------------------===// 2303let hasSideEffects = 0 in 2304defm EXTR : ExtractImm<"extr">; 2305def : InstAlias<"ror $dst, $src, $shift", 2306 (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; 2307def : InstAlias<"ror $dst, $src, $shift", 2308 (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; 2309 2310def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), 2311 (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; 2312def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), 2313 (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; 2314 2315//===----------------------------------------------------------------------===// 2316// Other bitfield immediate instructions. 2317//===----------------------------------------------------------------------===// 2318let hasSideEffects = 0 in { 2319defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; 2320defm SBFM : BitfieldImm<0b00, "sbfm">; 2321defm UBFM : BitfieldImm<0b10, "ubfm">; 2322} 2323 2324def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2325 uint64_t enc = (32 - N->getZExtValue()) & 0x1f; 2326 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2327}]>; 2328 2329def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2330 uint64_t enc = 31 - N->getZExtValue(); 2331 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2332}]>; 2333 2334// min(7, 31 - shift_amt) 2335def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2336 uint64_t enc = 31 - N->getZExtValue(); 2337 enc = enc > 7 ? 7 : enc; 2338 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2339}]>; 2340 2341// min(15, 31 - shift_amt) 2342def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2343 uint64_t enc = 31 - N->getZExtValue(); 2344 enc = enc > 15 ? 15 : enc; 2345 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2346}]>; 2347 2348def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2349 uint64_t enc = (64 - N->getZExtValue()) & 0x3f; 2350 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2351}]>; 2352 2353def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2354 uint64_t enc = 63 - N->getZExtValue(); 2355 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2356}]>; 2357 2358// min(7, 63 - shift_amt) 2359def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2360 uint64_t enc = 63 - N->getZExtValue(); 2361 enc = enc > 7 ? 7 : enc; 2362 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2363}]>; 2364 2365// min(15, 63 - shift_amt) 2366def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2367 uint64_t enc = 63 - N->getZExtValue(); 2368 enc = enc > 15 ? 15 : enc; 2369 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2370}]>; 2371 2372// min(31, 63 - shift_amt) 2373def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ 2374 uint64_t enc = 63 - N->getZExtValue(); 2375 enc = enc > 31 ? 31 : enc; 2376 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2377}]>; 2378 2379def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), 2380 (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 2381 (i64 (i32shift_b imm0_31:$imm)))>; 2382def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), 2383 (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 2384 (i64 (i64shift_b imm0_63:$imm)))>; 2385 2386let AddedComplexity = 10 in { 2387def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), 2388 (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2389def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), 2390 (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2391} 2392 2393def : InstAlias<"asr $dst, $src, $shift", 2394 (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2395def : InstAlias<"asr $dst, $src, $shift", 2396 (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2397def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2398def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2399def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2400def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2401def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2402 2403def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), 2404 (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2405def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), 2406 (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2407 2408def : InstAlias<"lsr $dst, $src, $shift", 2409 (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2410def : InstAlias<"lsr $dst, $src, $shift", 2411 (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2412def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2413def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2414def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2415def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2416def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2417 2418//===----------------------------------------------------------------------===// 2419// Conditional comparison instructions. 2420//===----------------------------------------------------------------------===// 2421defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; 2422defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; 2423 2424//===----------------------------------------------------------------------===// 2425// Conditional select instructions. 2426//===----------------------------------------------------------------------===// 2427defm CSEL : CondSelect<0, 0b00, "csel">; 2428 2429def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; 2430defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; 2431defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; 2432defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; 2433 2434def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2435 (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2436def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2437 (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2438def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2439 (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2440def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2441 (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2442def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2443 (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2444def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2445 (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2446 2447def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), 2448 (CSINCWr WZR, WZR, (i32 imm:$cc))>; 2449def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), 2450 (CSINCXr XZR, XZR, (i32 imm:$cc))>; 2451def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), 2452 (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2453def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), 2454 (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2455def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), 2456 (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2457def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), 2458 (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2459def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), 2460 (CSINVWr WZR, WZR, (i32 imm:$cc))>; 2461def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), 2462 (CSINVXr XZR, XZR, (i32 imm:$cc))>; 2463def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), 2464 (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2465def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), 2466 (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2467def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), 2468 (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2469def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), 2470 (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2471 2472def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2473 (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>; 2474def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2475 (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>; 2476 2477def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2478 (CSINCWr GPR32:$val, WZR, imm:$cc)>; 2479def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2480 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2481def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2482 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2483 2484def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2485 (CSELWr WZR, GPR32:$val, imm:$cc)>; 2486def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2487 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2488def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2489 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2490 2491// The inverse of the condition code from the alias instruction is what is used 2492// in the aliased instruction. The parser all ready inverts the condition code 2493// for these aliases. 2494def : InstAlias<"cset $dst, $cc", 2495 (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2496def : InstAlias<"cset $dst, $cc", 2497 (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2498 2499def : InstAlias<"csetm $dst, $cc", 2500 (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2501def : InstAlias<"csetm $dst, $cc", 2502 (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2503 2504def : InstAlias<"cinc $dst, $src, $cc", 2505 (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2506def : InstAlias<"cinc $dst, $src, $cc", 2507 (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2508 2509def : InstAlias<"cinv $dst, $src, $cc", 2510 (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2511def : InstAlias<"cinv $dst, $src, $cc", 2512 (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2513 2514def : InstAlias<"cneg $dst, $src, $cc", 2515 (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2516def : InstAlias<"cneg $dst, $src, $cc", 2517 (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2518 2519//===----------------------------------------------------------------------===// 2520// PC-relative instructions. 2521//===----------------------------------------------------------------------===// 2522let isReMaterializable = 1 in { 2523let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 2524def ADR : ADRI<0, "adr", adrlabel, 2525 [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>; 2526} // hasSideEffects = 0 2527 2528def ADRP : ADRI<1, "adrp", adrplabel, 2529 [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; 2530} // isReMaterializable = 1 2531 2532// page address of a constant pool entry, block address 2533def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>; 2534def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>; 2535def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>; 2536def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>; 2537def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; 2538def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; 2539def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; 2540 2541//===----------------------------------------------------------------------===// 2542// Unconditional branch (register) instructions. 2543//===----------------------------------------------------------------------===// 2544 2545let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 2546def RET : BranchReg<0b0010, "ret", []>; 2547def DRPS : SpecialReturn<0b0101, "drps">; 2548def ERET : SpecialReturn<0b0100, "eret">; 2549} // isReturn = 1, isTerminator = 1, isBarrier = 1 2550 2551// Default to the LR register. 2552def : InstAlias<"ret", (RET LR)>; 2553 2554let isCall = 1, Defs = [LR], Uses = [SP] in { 2555 def BLR : BranchReg<0b0001, "blr", []>; 2556 def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>, 2557 Sched<[WriteBrReg]>, 2558 PseudoInstExpansion<(BLR GPR64:$Rn)>; 2559 def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, 2560 Sched<[WriteBrReg]>; 2561 def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>, 2562 Sched<[WriteBrReg]>; 2563} // isCall 2564 2565def : Pat<(AArch64call GPR64:$Rn), 2566 (BLR GPR64:$Rn)>, 2567 Requires<[NoSLSBLRMitigation]>; 2568def : Pat<(AArch64call GPR64noip:$Rn), 2569 (BLRNoIP GPR64noip:$Rn)>, 2570 Requires<[SLSBLRMitigation]>; 2571 2572def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), 2573 (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, 2574 Requires<[NoSLSBLRMitigation]>; 2575 2576def : Pat<(AArch64call_bti GPR64:$Rn), 2577 (BLR_BTI GPR64:$Rn)>, 2578 Requires<[NoSLSBLRMitigation]>; 2579 2580let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 2581def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; 2582} // isBranch, isTerminator, isBarrier, isIndirectBranch 2583 2584// Create a separate pseudo-instruction for codegen to use so that we don't 2585// flag lr as used in every function. It'll be restored before the RET by the 2586// epilogue if it's legitimately used. 2587def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>, 2588 Sched<[WriteBrReg]> { 2589 let isTerminator = 1; 2590 let isBarrier = 1; 2591 let isReturn = 1; 2592} 2593 2594// This is a directive-like pseudo-instruction. The purpose is to insert an 2595// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction 2596// (which in the usual case is a BLR). 2597let hasSideEffects = 1 in 2598def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { 2599 let AsmString = ".tlsdesccall $sym"; 2600} 2601 2602// Pseudo instruction to tell the streamer to emit a 'B' character into the 2603// augmentation string. 2604def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {} 2605 2606// Pseudo instruction to tell the streamer to emit a 'G' character into the 2607// augmentation string. 2608def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {} 2609 2610// FIXME: maybe the scratch register used shouldn't be fixed to X1? 2611// FIXME: can "hasSideEffects be dropped? 2612// This gets lowered to an instruction sequence which takes 16 bytes 2613let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, Size = 16, 2614 isCodeGenOnly = 1 in 2615def TLSDESC_CALLSEQ 2616 : Pseudo<(outs), (ins i64imm:$sym), 2617 [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, 2618 Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; 2619def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), 2620 (TLSDESC_CALLSEQ texternalsym:$sym)>; 2621 2622//===----------------------------------------------------------------------===// 2623// Conditional branch (immediate) instruction. 2624//===----------------------------------------------------------------------===// 2625def Bcc : BranchCond<0, "b">; 2626 2627// Armv8.8-A variant form which hints to the branch predictor that 2628// this branch is very likely to go the same way nearly all the time 2629// (even though it is not known at compile time _which_ way that is). 2630def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; 2631 2632//===----------------------------------------------------------------------===// 2633// Compare-and-branch instructions. 2634//===----------------------------------------------------------------------===// 2635defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; 2636defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; 2637 2638//===----------------------------------------------------------------------===// 2639// Test-bit-and-branch instructions. 2640//===----------------------------------------------------------------------===// 2641defm TBZ : TestBranch<0, "tbz", AArch64tbz>; 2642defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; 2643 2644//===----------------------------------------------------------------------===// 2645// Unconditional branch (immediate) instructions. 2646//===----------------------------------------------------------------------===// 2647let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 2648def B : BranchImm<0, "b", [(br bb:$addr)]>; 2649} // isBranch, isTerminator, isBarrier 2650 2651let isCall = 1, Defs = [LR], Uses = [SP] in { 2652def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; 2653} // isCall 2654def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; 2655 2656//===----------------------------------------------------------------------===// 2657// Exception generation instructions. 2658//===----------------------------------------------------------------------===// 2659let isTrap = 1 in { 2660def BRK : ExceptionGeneration<0b001, 0b00, "brk", 2661 [(int_aarch64_break timm32_0_65535:$imm)]>; 2662} 2663def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; 2664def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; 2665def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>; 2666def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; 2667def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; 2668def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>; 2669def SVC : ExceptionGeneration<0b000, 0b01, "svc">; 2670 2671// DCPSn defaults to an immediate operand of zero if unspecified. 2672def : InstAlias<"dcps1", (DCPS1 0)>; 2673def : InstAlias<"dcps2", (DCPS2 0)>; 2674def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>; 2675 2676def UDF : UDFType<0, "udf">; 2677 2678//===----------------------------------------------------------------------===// 2679// Load instructions. 2680//===----------------------------------------------------------------------===// 2681 2682// Pair (indexed, offset) 2683defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; 2684defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; 2685defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">; 2686defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">; 2687defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">; 2688 2689defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2690 2691// Pair (pre-indexed) 2692def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2693def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2694def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2695def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2696def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2697 2698def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2699 2700// Pair (post-indexed) 2701def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2702def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2703def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2704def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2705def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2706 2707def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2708 2709 2710// Pair (no allocate) 2711defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">; 2712defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">; 2713defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">; 2714defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">; 2715defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">; 2716 2717def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 2718 (LDPXi GPR64sp:$Rn, simm7s8:$offset)>; 2719 2720def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 2721 (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>; 2722//--- 2723// (register offset) 2724//--- 2725 2726// Integer 2727defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; 2728defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; 2729defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; 2730defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; 2731 2732// Floating-point 2733defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>; 2734defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; 2735defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; 2736defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; 2737defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>; 2738 2739// Load sign-extended half-word 2740defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; 2741defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; 2742 2743// Load sign-extended byte 2744defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; 2745defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; 2746 2747// Load sign-extended word 2748defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; 2749 2750// Pre-fetch. 2751defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; 2752 2753// For regular load, we do not have any alignment requirement. 2754// Thus, it is safe to directly map the vector loads with interesting 2755// addressing modes. 2756// FIXME: We could do the same for bitconvert to floating point vectors. 2757multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, 2758 ValueType ScalTy, ValueType VecTy, 2759 Instruction LOADW, Instruction LOADX, 2760 SubRegIndex sub> { 2761 def : Pat<(VecTy (scalar_to_vector (ScalTy 2762 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), 2763 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 2764 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), 2765 sub)>; 2766 2767 def : Pat<(VecTy (scalar_to_vector (ScalTy 2768 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), 2769 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 2770 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), 2771 sub)>; 2772} 2773 2774let AddedComplexity = 10 in { 2775defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; 2776defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; 2777 2778defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; 2779defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; 2780 2781defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>; 2782defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>; 2783 2784defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; 2785defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; 2786 2787defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; 2788defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; 2789 2790defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; 2791 2792defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; 2793 2794 2795def : Pat <(v1i64 (scalar_to_vector (i64 2796 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, 2797 ro_Wextend64:$extend))))), 2798 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; 2799 2800def : Pat <(v1i64 (scalar_to_vector (i64 2801 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, 2802 ro_Xextend64:$extend))))), 2803 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; 2804} 2805 2806// Match all load 64 bits width whose type is compatible with FPR64 2807multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, 2808 Instruction LOADW, Instruction LOADX> { 2809 2810 def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2811 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 2812 2813 def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2814 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 2815} 2816 2817let AddedComplexity = 10 in { 2818let Predicates = [IsLE] in { 2819 // We must do vector loads with LD1 in big-endian. 2820 defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; 2821 defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; 2822 defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; 2823 defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; 2824 defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>; 2825 defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>; 2826} 2827 2828defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; 2829defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; 2830 2831// Match all load 128 bits width whose type is compatible with FPR128 2832let Predicates = [IsLE] in { 2833 // We must do vector loads with LD1 in big-endian. 2834 defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; 2835 defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; 2836 defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; 2837 defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; 2838 defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; 2839 defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>; 2840 defm : VecROLoadPat<ro128, v8bf16, LDRQroW, LDRQroX>; 2841 defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; 2842} 2843} // AddedComplexity = 10 2844 2845// zextload -> i64 2846multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, 2847 Instruction INSTW, Instruction INSTX> { 2848 def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2849 (SUBREG_TO_REG (i64 0), 2850 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 2851 sub_32)>; 2852 2853 def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2854 (SUBREG_TO_REG (i64 0), 2855 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 2856 sub_32)>; 2857} 2858 2859let AddedComplexity = 10 in { 2860 defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; 2861 defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; 2862 defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; 2863 2864 // zextloadi1 -> zextloadi8 2865 defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 2866 2867 // extload -> zextload 2868 defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 2869 defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 2870 defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 2871 2872 // extloadi1 -> zextloadi8 2873 defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; 2874} 2875 2876 2877// zextload -> i64 2878multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, 2879 Instruction INSTW, Instruction INSTX> { 2880 def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2881 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 2882 2883 def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2884 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 2885 2886} 2887 2888let AddedComplexity = 10 in { 2889 // extload -> zextload 2890 defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 2891 defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 2892 defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 2893 2894 // zextloadi1 -> zextloadi8 2895 defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 2896} 2897 2898//--- 2899// (unsigned immediate) 2900//--- 2901defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr", 2902 [(set GPR64z:$Rt, 2903 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 2904defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr", 2905 [(set GPR32z:$Rt, 2906 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 2907defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", 2908 [(set FPR8Op:$Rt, 2909 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; 2910defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", 2911 [(set (f16 FPR16Op:$Rt), 2912 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; 2913defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr", 2914 [(set (f32 FPR32Op:$Rt), 2915 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 2916defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr", 2917 [(set (f64 FPR64Op:$Rt), 2918 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 2919defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr", 2920 [(set (f128 FPR128Op:$Rt), 2921 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; 2922 2923// bf16 load pattern 2924def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 2925 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 2926 2927// For regular load, we do not have any alignment requirement. 2928// Thus, it is safe to directly map the vector loads with interesting 2929// addressing modes. 2930// FIXME: We could do the same for bitconvert to floating point vectors. 2931def : Pat <(v8i8 (scalar_to_vector (i32 2932 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 2933 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 2934 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 2935def : Pat <(v16i8 (scalar_to_vector (i32 2936 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 2937 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 2938 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 2939def : Pat <(v4i16 (scalar_to_vector (i32 2940 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 2941 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 2942 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 2943def : Pat <(v8i16 (scalar_to_vector (i32 2944 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 2945 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 2946 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 2947def : Pat <(v2i32 (scalar_to_vector (i32 2948 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 2949 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 2950 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 2951def : Pat <(v4i32 (scalar_to_vector (i32 2952 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 2953 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 2954 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 2955def : Pat <(v1i64 (scalar_to_vector (i64 2956 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 2957 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2958def : Pat <(v2i64 (scalar_to_vector (i64 2959 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 2960 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 2961 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; 2962 2963// Match all load 64 bits width whose type is compatible with FPR64 2964let Predicates = [IsLE] in { 2965 // We must use LD1 to perform vector loads in big-endian. 2966 def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2967 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2968 def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2969 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2970 def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2971 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2972 def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2973 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2974 def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2975 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2976 def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2977 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2978} 2979def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2980 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2981def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2982 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2983 2984// Match all load 128 bits width whose type is compatible with FPR128 2985let Predicates = [IsLE] in { 2986 // We must use LD1 to perform vector loads in big-endian. 2987 def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 2988 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 2989 def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 2990 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 2991 def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 2992 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 2993 def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 2994 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 2995 def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 2996 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 2997 def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 2998 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 2999 def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3000 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3001 def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3002 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3003} 3004def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3005 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3006 3007defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", 3008 [(set GPR32:$Rt, 3009 (zextloadi16 (am_indexed16 GPR64sp:$Rn, 3010 uimm12s2:$offset)))]>; 3011defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", 3012 [(set GPR32:$Rt, 3013 (zextloadi8 (am_indexed8 GPR64sp:$Rn, 3014 uimm12s1:$offset)))]>; 3015// zextload -> i64 3016def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3017 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3018def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3019 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3020 3021// zextloadi1 -> zextloadi8 3022def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3023 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3024def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3025 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3026 3027// extload -> zextload 3028def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3029 (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; 3030def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3031 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3032def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3033 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3034def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3035 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3036def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3037 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3038def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3039 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3040def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3041 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3042 3043// load sign-extended half-word 3044defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", 3045 [(set GPR32:$Rt, 3046 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3047 uimm12s2:$offset)))]>; 3048defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", 3049 [(set GPR64:$Rt, 3050 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3051 uimm12s2:$offset)))]>; 3052 3053// load sign-extended byte 3054defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", 3055 [(set GPR32:$Rt, 3056 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3057 uimm12s1:$offset)))]>; 3058defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", 3059 [(set GPR64:$Rt, 3060 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3061 uimm12s1:$offset)))]>; 3062 3063// load sign-extended word 3064defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", 3065 [(set GPR64:$Rt, 3066 (sextloadi32 (am_indexed32 GPR64sp:$Rn, 3067 uimm12s4:$offset)))]>; 3068 3069// load zero-extended word 3070def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3071 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3072 3073// Pre-fetch. 3074def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", 3075 [(AArch64Prefetch timm:$Rt, 3076 (am_indexed64 GPR64sp:$Rn, 3077 uimm12s8:$offset))]>; 3078 3079def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; 3080 3081//--- 3082// (literal) 3083 3084def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{ 3085 if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) { 3086 const DataLayout &DL = MF->getDataLayout(); 3087 Align Align = G->getGlobal()->getPointerAlignment(DL); 3088 return Align >= 4 && G->getOffset() % 4 == 0; 3089 } 3090 if (auto *C = dyn_cast<ConstantPoolSDNode>(N)) 3091 return C->getAlign() >= 4 && C->getOffset() % 4 == 0; 3092 return false; 3093}]>; 3094 3095def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr", 3096 [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3097def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr", 3098 [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3099def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr", 3100 [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3101def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr", 3102 [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3103def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr", 3104 [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3105 3106// load sign-extended word 3107def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw", 3108 [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>; 3109 3110let AddedComplexity = 20 in { 3111def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))), 3112 (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>; 3113} 3114 3115// prefetch 3116def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; 3117// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; 3118 3119//--- 3120// (unscaled immediate) 3121defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur", 3122 [(set GPR64z:$Rt, 3123 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3124defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur", 3125 [(set GPR32z:$Rt, 3126 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3127defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", 3128 [(set FPR8Op:$Rt, 3129 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3130defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", 3131 [(set (f16 FPR16Op:$Rt), 3132 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3133defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", 3134 [(set (f32 FPR32Op:$Rt), 3135 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3136defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur", 3137 [(set (f64 FPR64Op:$Rt), 3138 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3139defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur", 3140 [(set (f128 FPR128Op:$Rt), 3141 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; 3142 3143defm LDURHH 3144 : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", 3145 [(set GPR32:$Rt, 3146 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3147defm LDURBB 3148 : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", 3149 [(set GPR32:$Rt, 3150 (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3151 3152// Match all load 64 bits width whose type is compatible with FPR64 3153let Predicates = [IsLE] in { 3154 def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3155 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3156 def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3157 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3158 def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3159 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3160 def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3161 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3162 def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3163 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3164} 3165def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3166 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3167def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3168 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3169 3170// Match all load 128 bits width whose type is compatible with FPR128 3171let Predicates = [IsLE] in { 3172 def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3173 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3174 def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3175 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3176 def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3177 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3178 def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3179 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3180 def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3181 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3182 def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3183 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3184 def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3185 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3186} 3187 3188// anyext -> zext 3189def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3190 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3191def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3192 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3193def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3194 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3195def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3196 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3197def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3198 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3199def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3200 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3201def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3202 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3203// unscaled zext 3204def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3205 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3206def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3207 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3208def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3209 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3210def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3211 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3212def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3213 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3214def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3215 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3216def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3217 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3218 3219 3220//--- 3221// LDR mnemonics fall back to LDUR for negative or unaligned offsets. 3222 3223// Define new assembler match classes as we want to only match these when 3224// the don't otherwise match the scaled addressing mode for LDR/STR. Don't 3225// associate a DiagnosticType either, as we want the diagnostic for the 3226// canonical form (the scaled operand) to take precedence. 3227class SImm9OffsetOperand<int Width> : AsmOperandClass { 3228 let Name = "SImm9OffsetFB" # Width; 3229 let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; 3230 let RenderMethod = "addImmOperands"; 3231} 3232 3233def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; 3234def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; 3235def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; 3236def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; 3237def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; 3238 3239def simm9_offset_fb8 : Operand<i64> { 3240 let ParserMatchClass = SImm9OffsetFB8Operand; 3241} 3242def simm9_offset_fb16 : Operand<i64> { 3243 let ParserMatchClass = SImm9OffsetFB16Operand; 3244} 3245def simm9_offset_fb32 : Operand<i64> { 3246 let ParserMatchClass = SImm9OffsetFB32Operand; 3247} 3248def simm9_offset_fb64 : Operand<i64> { 3249 let ParserMatchClass = SImm9OffsetFB64Operand; 3250} 3251def simm9_offset_fb128 : Operand<i64> { 3252 let ParserMatchClass = SImm9OffsetFB128Operand; 3253} 3254 3255def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3256 (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3257def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3258 (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3259def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3260 (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3261def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3262 (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3263def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3264 (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3265def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3266 (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3267def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3268 (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 3269 3270// zextload -> i64 3271def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3272 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3273def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3274 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3275 3276// load sign-extended half-word 3277defm LDURSHW 3278 : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", 3279 [(set GPR32:$Rt, 3280 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3281defm LDURSHX 3282 : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", 3283 [(set GPR64:$Rt, 3284 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3285 3286// load sign-extended byte 3287defm LDURSBW 3288 : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", 3289 [(set GPR32:$Rt, 3290 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3291defm LDURSBX 3292 : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", 3293 [(set GPR64:$Rt, 3294 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3295 3296// load sign-extended word 3297defm LDURSW 3298 : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", 3299 [(set GPR64:$Rt, 3300 (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3301 3302// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. 3303def : InstAlias<"ldrb $Rt, [$Rn, $offset]", 3304 (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3305def : InstAlias<"ldrh $Rt, [$Rn, $offset]", 3306 (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3307def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3308 (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3309def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3310 (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3311def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3312 (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3313def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3314 (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3315def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", 3316 (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3317 3318// Pre-fetch. 3319defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", 3320 [(AArch64Prefetch timm:$Rt, 3321 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3322 3323//--- 3324// (unscaled immediate, unprivileged) 3325defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; 3326defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; 3327 3328defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; 3329defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; 3330 3331// load sign-extended half-word 3332defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; 3333defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; 3334 3335// load sign-extended byte 3336defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; 3337defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; 3338 3339// load sign-extended word 3340defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; 3341 3342//--- 3343// (immediate pre-indexed) 3344def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3345def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3346def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3347def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3348def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3349def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3350def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3351 3352// load sign-extended half-word 3353def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3354def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3355 3356// load sign-extended byte 3357def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3358def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3359 3360// load zero-extended byte 3361def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3362def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3363 3364// load sign-extended word 3365def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3366 3367//--- 3368// (immediate post-indexed) 3369def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3370def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3371def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3372def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3373def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3374def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3375def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3376 3377// load sign-extended half-word 3378def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3379def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3380 3381// load sign-extended byte 3382def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3383def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3384 3385// load zero-extended byte 3386def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3387def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3388 3389// load sign-extended word 3390def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3391 3392//===----------------------------------------------------------------------===// 3393// Store instructions. 3394//===----------------------------------------------------------------------===// 3395 3396// Pair (indexed, offset) 3397// FIXME: Use dedicated range-checked addressing mode operand here. 3398defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">; 3399defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">; 3400defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">; 3401defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">; 3402defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">; 3403 3404// Pair (pre-indexed) 3405def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3406def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3407def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3408def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3409def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3410 3411// Pair (pre-indexed) 3412def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3413def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3414def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3415def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3416def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3417 3418// Pair (no allocate) 3419defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">; 3420defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">; 3421defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">; 3422defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">; 3423defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">; 3424 3425def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 3426 (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>; 3427 3428def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 3429 (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>; 3430 3431 3432//--- 3433// (Register offset) 3434 3435// Integer 3436defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; 3437defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; 3438defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; 3439defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; 3440 3441 3442// Floating-point 3443defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>; 3444defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; 3445defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; 3446defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; 3447defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">; 3448 3449let Predicates = [UseSTRQro], AddedComplexity = 10 in { 3450 def : Pat<(store (f128 FPR128:$Rt), 3451 (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, 3452 ro_Wextend128:$extend)), 3453 (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; 3454 def : Pat<(store (f128 FPR128:$Rt), 3455 (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, 3456 ro_Xextend128:$extend)), 3457 (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; 3458} 3459 3460multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop, 3461 Instruction STRW, Instruction STRX> { 3462 3463 def : Pat<(storeop GPR64:$Rt, 3464 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3465 (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3466 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3467 3468 def : Pat<(storeop GPR64:$Rt, 3469 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3470 (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3471 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3472} 3473 3474let AddedComplexity = 10 in { 3475 // truncstore i64 3476 defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>; 3477 defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>; 3478 defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>; 3479} 3480 3481multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR, 3482 Instruction STRW, Instruction STRX> { 3483 def : Pat<(store (VecTy FPR:$Rt), 3484 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3485 (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3486 3487 def : Pat<(store (VecTy FPR:$Rt), 3488 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3489 (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3490} 3491 3492let AddedComplexity = 10 in { 3493// Match all store 64 bits width whose type is compatible with FPR64 3494let Predicates = [IsLE] in { 3495 // We must use ST1 to store vectors in big-endian. 3496 defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>; 3497 defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>; 3498 defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>; 3499 defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>; 3500 defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>; 3501 defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>; 3502} 3503 3504defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>; 3505defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>; 3506 3507// Match all store 128 bits width whose type is compatible with FPR128 3508let Predicates = [IsLE, UseSTRQro] in { 3509 // We must use ST1 to store vectors in big-endian. 3510 defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>; 3511 defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>; 3512 defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>; 3513 defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>; 3514 defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>; 3515 defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>; 3516 defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>; 3517 defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>; 3518} 3519} // AddedComplexity = 10 3520 3521// Match stores from lane 0 to the appropriate subreg's store. 3522multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop, 3523 ValueType VecTy, ValueType STy, 3524 SubRegIndex SubRegIdx, 3525 Instruction STRW, Instruction STRX> { 3526 3527 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), 3528 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3529 (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3530 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3531 3532 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), 3533 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3534 (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3535 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3536} 3537 3538let AddedComplexity = 19 in { 3539 defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>; 3540 defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, hsub, STRHroW, STRHroX>; 3541 defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, ssub, STRSroW, STRSroX>; 3542 defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, ssub, STRSroW, STRSroX>; 3543 defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, dsub, STRDroW, STRDroX>; 3544 defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, dsub, STRDroW, STRDroX>; 3545} 3546 3547//--- 3548// (unsigned immediate) 3549defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", 3550 [(store GPR64z:$Rt, 3551 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3552defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", 3553 [(store GPR32z:$Rt, 3554 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3555defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", 3556 [(store FPR8Op:$Rt, 3557 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; 3558defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", 3559 [(store (f16 FPR16Op:$Rt), 3560 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; 3561defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str", 3562 [(store (f32 FPR32Op:$Rt), 3563 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3564defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str", 3565 [(store (f64 FPR64Op:$Rt), 3566 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3567defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>; 3568 3569defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", 3570 [(truncstorei16 GPR32z:$Rt, 3571 (am_indexed16 GPR64sp:$Rn, 3572 uimm12s2:$offset))]>; 3573defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", 3574 [(truncstorei8 GPR32z:$Rt, 3575 (am_indexed8 GPR64sp:$Rn, 3576 uimm12s1:$offset))]>; 3577 3578// bf16 store pattern 3579def : Pat<(store (bf16 FPR16Op:$Rt), 3580 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3581 (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>; 3582 3583let AddedComplexity = 10 in { 3584 3585// Match all store 64 bits width whose type is compatible with FPR64 3586def : Pat<(store (v1i64 FPR64:$Rt), 3587 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3588 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3589def : Pat<(store (v1f64 FPR64:$Rt), 3590 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3591 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3592 3593let Predicates = [IsLE] in { 3594 // We must use ST1 to store vectors in big-endian. 3595 def : Pat<(store (v2f32 FPR64:$Rt), 3596 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3597 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3598 def : Pat<(store (v8i8 FPR64:$Rt), 3599 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3600 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3601 def : Pat<(store (v4i16 FPR64:$Rt), 3602 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3603 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3604 def : Pat<(store (v2i32 FPR64:$Rt), 3605 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3606 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3607 def : Pat<(store (v4f16 FPR64:$Rt), 3608 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3609 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3610 def : Pat<(store (v4bf16 FPR64:$Rt), 3611 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3612 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3613} 3614 3615// Match all store 128 bits width whose type is compatible with FPR128 3616def : Pat<(store (f128 FPR128:$Rt), 3617 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3618 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3619 3620let Predicates = [IsLE] in { 3621 // We must use ST1 to store vectors in big-endian. 3622 def : Pat<(store (v4f32 FPR128:$Rt), 3623 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3624 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3625 def : Pat<(store (v2f64 FPR128:$Rt), 3626 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3627 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3628 def : Pat<(store (v16i8 FPR128:$Rt), 3629 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3630 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3631 def : Pat<(store (v8i16 FPR128:$Rt), 3632 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3633 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3634 def : Pat<(store (v4i32 FPR128:$Rt), 3635 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3636 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3637 def : Pat<(store (v2i64 FPR128:$Rt), 3638 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3639 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3640 def : Pat<(store (v8f16 FPR128:$Rt), 3641 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3642 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3643 def : Pat<(store (v8bf16 FPR128:$Rt), 3644 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3645 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3646} 3647 3648// truncstore i64 3649def : Pat<(truncstorei32 GPR64:$Rt, 3650 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), 3651 (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; 3652def : Pat<(truncstorei16 GPR64:$Rt, 3653 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3654 (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; 3655def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), 3656 (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; 3657 3658} // AddedComplexity = 10 3659 3660// Match stores from lane 0 to the appropriate subreg's store. 3661multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop, 3662 ValueType VTy, ValueType STy, 3663 SubRegIndex SubRegIdx, Operand IndexType, 3664 Instruction STR> { 3665 def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)), 3666 (UIAddrMode GPR64sp:$Rn, IndexType:$offset)), 3667 (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3668 GPR64sp:$Rn, IndexType:$offset)>; 3669} 3670 3671let AddedComplexity = 19 in { 3672 defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>; 3673 defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, hsub, uimm12s2, STRHui>; 3674 defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, ssub, uimm12s4, STRSui>; 3675 defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, ssub, uimm12s4, STRSui>; 3676 defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, dsub, uimm12s8, STRDui>; 3677 defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, dsub, uimm12s8, STRDui>; 3678} 3679 3680//--- 3681// (unscaled immediate) 3682defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur", 3683 [(store GPR64z:$Rt, 3684 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3685defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur", 3686 [(store GPR32z:$Rt, 3687 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 3688defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", 3689 [(store FPR8Op:$Rt, 3690 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 3691defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", 3692 [(store (f16 FPR16Op:$Rt), 3693 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 3694defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur", 3695 [(store (f32 FPR32Op:$Rt), 3696 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 3697defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur", 3698 [(store (f64 FPR64Op:$Rt), 3699 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3700defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur", 3701 [(store (f128 FPR128Op:$Rt), 3702 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; 3703defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh", 3704 [(truncstorei16 GPR32z:$Rt, 3705 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 3706defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb", 3707 [(truncstorei8 GPR32z:$Rt, 3708 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 3709 3710// Armv8.4 Weaker Release Consistency enhancements 3711// LDAPR & STLR with Immediate Offset instructions 3712let Predicates = [HasRCPC_IMMO] in { 3713defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; 3714defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; 3715defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; 3716defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>; 3717defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>; 3718defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>; 3719defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>; 3720defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>; 3721defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>; 3722defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>; 3723defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>; 3724defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>; 3725defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>; 3726} 3727 3728// Match all store 64 bits width whose type is compatible with FPR64 3729def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3730 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3731def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3732 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3733 3734let AddedComplexity = 10 in { 3735 3736let Predicates = [IsLE] in { 3737 // We must use ST1 to store vectors in big-endian. 3738 def : Pat<(store (v2f32 FPR64:$Rt), 3739 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3740 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3741 def : Pat<(store (v8i8 FPR64:$Rt), 3742 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3743 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3744 def : Pat<(store (v4i16 FPR64:$Rt), 3745 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3746 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3747 def : Pat<(store (v2i32 FPR64:$Rt), 3748 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3749 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3750 def : Pat<(store (v4f16 FPR64:$Rt), 3751 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3752 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3753 def : Pat<(store (v4bf16 FPR64:$Rt), 3754 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3755 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3756} 3757 3758// Match all store 128 bits width whose type is compatible with FPR128 3759def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3760 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3761 3762let Predicates = [IsLE] in { 3763 // We must use ST1 to store vectors in big-endian. 3764 def : Pat<(store (v4f32 FPR128:$Rt), 3765 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3766 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3767 def : Pat<(store (v2f64 FPR128:$Rt), 3768 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3769 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3770 def : Pat<(store (v16i8 FPR128:$Rt), 3771 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3772 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3773 def : Pat<(store (v8i16 FPR128:$Rt), 3774 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3775 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3776 def : Pat<(store (v4i32 FPR128:$Rt), 3777 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3778 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3779 def : Pat<(store (v2i64 FPR128:$Rt), 3780 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3781 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3782 def : Pat<(store (v2f64 FPR128:$Rt), 3783 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3784 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3785 def : Pat<(store (v8f16 FPR128:$Rt), 3786 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3787 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3788 def : Pat<(store (v8bf16 FPR128:$Rt), 3789 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3790 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3791} 3792 3793} // AddedComplexity = 10 3794 3795// unscaled i64 truncating stores 3796def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), 3797 (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3798def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 3799 (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3800def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), 3801 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3802 3803// Match stores from lane 0 to the appropriate subreg's store. 3804multiclass VecStoreULane0Pat<SDPatternOperator StoreOp, 3805 ValueType VTy, ValueType STy, 3806 SubRegIndex SubRegIdx, Instruction STR> { 3807 defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>; 3808} 3809 3810let AddedComplexity = 19 in { 3811 defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>; 3812 defm : VecStoreULane0Pat<store, v8f16, f16, hsub, STURHi>; 3813 defm : VecStoreULane0Pat<store, v4i32, i32, ssub, STURSi>; 3814 defm : VecStoreULane0Pat<store, v4f32, f32, ssub, STURSi>; 3815 defm : VecStoreULane0Pat<store, v2i64, i64, dsub, STURDi>; 3816 defm : VecStoreULane0Pat<store, v2f64, f64, dsub, STURDi>; 3817} 3818 3819//--- 3820// STR mnemonics fall back to STUR for negative or unaligned offsets. 3821def : InstAlias<"str $Rt, [$Rn, $offset]", 3822 (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3823def : InstAlias<"str $Rt, [$Rn, $offset]", 3824 (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3825def : InstAlias<"str $Rt, [$Rn, $offset]", 3826 (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3827def : InstAlias<"str $Rt, [$Rn, $offset]", 3828 (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3829def : InstAlias<"str $Rt, [$Rn, $offset]", 3830 (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3831def : InstAlias<"str $Rt, [$Rn, $offset]", 3832 (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3833def : InstAlias<"str $Rt, [$Rn, $offset]", 3834 (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 3835 3836def : InstAlias<"strb $Rt, [$Rn, $offset]", 3837 (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3838def : InstAlias<"strh $Rt, [$Rn, $offset]", 3839 (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3840 3841//--- 3842// (unscaled immediate, unprivileged) 3843defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; 3844defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; 3845 3846defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; 3847defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; 3848 3849//--- 3850// (immediate pre-indexed) 3851def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; 3852def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; 3853def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>; 3854def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; 3855def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; 3856def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; 3857def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; 3858 3859def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; 3860def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; 3861 3862// truncstore i64 3863def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3864 (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3865 simm9:$off)>; 3866def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3867 (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3868 simm9:$off)>; 3869def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3870 (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3871 simm9:$off)>; 3872 3873def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3874 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3875def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3876 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3877def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3878 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3879def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3880 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3881def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3882 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3883def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3884 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3885def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3886 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3887 3888def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3889 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3890def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3891 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3892def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3893 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3894def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3895 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3896def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3897 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3898def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3899 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3900def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3901 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3902 3903//--- 3904// (immediate post-indexed) 3905def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; 3906def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; 3907def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>; 3908def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; 3909def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; 3910def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; 3911def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>; 3912 3913def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>; 3914def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>; 3915 3916// truncstore i64 3917def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3918 (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3919 simm9:$off)>; 3920def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3921 (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3922 simm9:$off)>; 3923def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3924 (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3925 simm9:$off)>; 3926 3927def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), 3928 (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; 3929 3930def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3931 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3932def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3933 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3934def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3935 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3936def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3937 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3938def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3939 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3940def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3941 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3942def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3943 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3944def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3945 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3946 3947def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3948 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3949def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3950 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3951def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3952 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3953def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3954 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3955def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3956 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3957def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3958 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3959def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3960 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3961def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3962 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3963 3964//===----------------------------------------------------------------------===// 3965// Load/store exclusive instructions. 3966//===----------------------------------------------------------------------===// 3967 3968def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; 3969def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; 3970def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; 3971def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; 3972 3973def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; 3974def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; 3975def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; 3976def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; 3977 3978def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; 3979def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; 3980def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; 3981def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; 3982 3983def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; 3984def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; 3985def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; 3986def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; 3987 3988/* 3989Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn 3990of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an 3991alias for the case of immediate #0. This is because new STLR versions (from 3992LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not 3993appropriate anymore (it parses and discards the optional zero). This is not the 3994case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed, 3995and the immediate values are not inside the [] brackets and thus not accepted 3996by GPR64sp0 parser. 3997*/ 3998def STLRW0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW GPR32: $Rt, GPR64sp:$Rn)>; 3999def STLRX0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX GPR64: $Rt, GPR64sp:$Rn)>; 4000def STLRB0 : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB GPR32: $Rt, GPR64sp:$Rn)>; 4001def STLRH0 : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH GPR32: $Rt, GPR64sp:$Rn)>; 4002 4003def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; 4004def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; 4005def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; 4006def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; 4007 4008def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; 4009def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; 4010def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; 4011def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; 4012 4013def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; 4014def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; 4015 4016def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; 4017def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; 4018 4019def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; 4020def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; 4021 4022def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; 4023def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; 4024 4025let Predicates = [HasLOR] in { 4026 // v8.1a "Limited Order Region" extension load-acquire instructions 4027 def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; 4028 def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; 4029 def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; 4030 def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; 4031 4032 // v8.1a "Limited Order Region" extension store-release instructions 4033 def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; 4034 def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; 4035 def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; 4036 def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; 4037 4038 // Aliases for when offset=0 4039 def STLLRW0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRW GPR32: $Rt, GPR64sp:$Rn)>; 4040 def STLLRX0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRX GPR64: $Rt, GPR64sp:$Rn)>; 4041 def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]", (STLLRB GPR32: $Rt, GPR64sp:$Rn)>; 4042 def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]", (STLLRH GPR32: $Rt, GPR64sp:$Rn)>; 4043} 4044 4045//===----------------------------------------------------------------------===// 4046// Scaled floating point to integer conversion instructions. 4047//===----------------------------------------------------------------------===// 4048 4049defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; 4050defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; 4051defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; 4052defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; 4053defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; 4054defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; 4055defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; 4056defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; 4057defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4058defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4059defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4060defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4061 4062// AArch64's FCVT instructions saturate when out of range. 4063multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> { 4064 let Predicates = [HasFullFP16] in { 4065 def : Pat<(i32 (to_int_sat f16:$Rn, i32)), 4066 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4067 def : Pat<(i64 (to_int_sat f16:$Rn, i64)), 4068 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4069 } 4070 def : Pat<(i32 (to_int_sat f32:$Rn, i32)), 4071 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4072 def : Pat<(i64 (to_int_sat f32:$Rn, i64)), 4073 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4074 def : Pat<(i32 (to_int_sat f64:$Rn, i32)), 4075 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4076 def : Pat<(i64 (to_int_sat f64:$Rn, i64)), 4077 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4078 4079 let Predicates = [HasFullFP16] in { 4080 def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), 4081 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4082 def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), 4083 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4084 } 4085 def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), 4086 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4087 def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), 4088 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4089 def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), 4090 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4091 def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), 4092 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4093} 4094 4095defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">; 4096defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">; 4097 4098multiclass FPToIntegerIntPats<Intrinsic round, string INST> { 4099 let Predicates = [HasFullFP16] in { 4100 def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>; 4101 def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>; 4102 } 4103 def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>; 4104 def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>; 4105 def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>; 4106 def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>; 4107 4108 let Predicates = [HasFullFP16] in { 4109 def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), 4110 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4111 def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), 4112 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4113 } 4114 def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), 4115 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4116 def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), 4117 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4118 def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), 4119 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4120 def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), 4121 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4122} 4123 4124defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">; 4125defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">; 4126 4127multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> { 4128 def : Pat<(i32 (to_int (round f32:$Rn))), 4129 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4130 def : Pat<(i64 (to_int (round f32:$Rn))), 4131 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4132 def : Pat<(i32 (to_int (round f64:$Rn))), 4133 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4134 def : Pat<(i64 (to_int (round f64:$Rn))), 4135 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4136 4137 // These instructions saturate like fp_to_[su]int_sat. 4138 let Predicates = [HasFullFP16] in { 4139 def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), 4140 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4141 def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), 4142 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4143 } 4144 def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), 4145 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4146 def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), 4147 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4148 def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), 4149 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4150 def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), 4151 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4152} 4153 4154defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">; 4155defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">; 4156defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">; 4157defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">; 4158defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">; 4159defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">; 4160defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">; 4161defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">; 4162 4163 4164 4165let Predicates = [HasFullFP16] in { 4166 def : Pat<(i32 (any_lround f16:$Rn)), 4167 (!cast<Instruction>(FCVTASUWHr) f16:$Rn)>; 4168 def : Pat<(i64 (any_lround f16:$Rn)), 4169 (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>; 4170 def : Pat<(i64 (any_llround f16:$Rn)), 4171 (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>; 4172} 4173def : Pat<(i32 (any_lround f32:$Rn)), 4174 (!cast<Instruction>(FCVTASUWSr) f32:$Rn)>; 4175def : Pat<(i32 (any_lround f64:$Rn)), 4176 (!cast<Instruction>(FCVTASUWDr) f64:$Rn)>; 4177def : Pat<(i64 (any_lround f32:$Rn)), 4178 (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>; 4179def : Pat<(i64 (any_lround f64:$Rn)), 4180 (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>; 4181def : Pat<(i64 (any_llround f32:$Rn)), 4182 (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>; 4183def : Pat<(i64 (any_llround f64:$Rn)), 4184 (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>; 4185 4186//===----------------------------------------------------------------------===// 4187// Scaled integer to floating point conversion instructions. 4188//===----------------------------------------------------------------------===// 4189 4190defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; 4191defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; 4192 4193//===----------------------------------------------------------------------===// 4194// Unscaled integer to floating point conversion instruction. 4195//===----------------------------------------------------------------------===// 4196 4197defm FMOV : UnscaledConversion<"fmov">; 4198 4199// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable 4200let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { 4201def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, 4202 Sched<[WriteF]>, Requires<[HasFullFP16]>; 4203def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, 4204 Sched<[WriteF]>; 4205def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, 4206 Sched<[WriteF]>; 4207} 4208// Similarly add aliases 4209def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, 4210 Requires<[HasFullFP16]>; 4211def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; 4212def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; 4213 4214// Pattern for FP16 immediates 4215let Predicates = [HasFullFP16] in { 4216 def : Pat<(f16 fpimm:$in), 4217 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; 4218} 4219 4220//===----------------------------------------------------------------------===// 4221// Floating point conversion instruction. 4222//===----------------------------------------------------------------------===// 4223 4224defm FCVT : FPConversion<"fcvt">; 4225 4226//===----------------------------------------------------------------------===// 4227// Floating point single operand instructions. 4228//===----------------------------------------------------------------------===// 4229 4230defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>; 4231defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">; 4232defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>; 4233defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; 4234defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; 4235defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; 4236defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; 4237defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; 4238 4239defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; 4240defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; 4241 4242let SchedRW = [WriteFDiv] in { 4243defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; 4244} 4245 4246let Predicates = [HasFRInt3264] in { 4247 defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>; 4248 defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>; 4249 defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>; 4250 defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; 4251} // HasFRInt3264 4252 4253// Emitting strict_lrint as two instructions is valid as any exceptions that 4254// occur will happen in exactly one of the instructions (e.g. if the input is 4255// not an integer the inexact exception will happen in the FRINTX but not then 4256// in the FCVTZS as the output of FRINTX is an integer). 4257let Predicates = [HasFullFP16] in { 4258 def : Pat<(i32 (any_lrint f16:$Rn)), 4259 (FCVTZSUWHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>; 4260 def : Pat<(i64 (any_lrint f16:$Rn)), 4261 (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>; 4262 def : Pat<(i64 (any_llrint f16:$Rn)), 4263 (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>; 4264} 4265def : Pat<(i32 (any_lrint f32:$Rn)), 4266 (FCVTZSUWSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>; 4267def : Pat<(i32 (any_lrint f64:$Rn)), 4268 (FCVTZSUWDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>; 4269def : Pat<(i64 (any_lrint f32:$Rn)), 4270 (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>; 4271def : Pat<(i64 (any_lrint f64:$Rn)), 4272 (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>; 4273def : Pat<(i64 (any_llrint f32:$Rn)), 4274 (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>; 4275def : Pat<(i64 (any_llrint f64:$Rn)), 4276 (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>; 4277 4278//===----------------------------------------------------------------------===// 4279// Floating point two operand instructions. 4280//===----------------------------------------------------------------------===// 4281 4282defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; 4283let SchedRW = [WriteFDiv] in { 4284defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; 4285} 4286defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; 4287defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; 4288defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; 4289defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; 4290let SchedRW = [WriteFMul] in { 4291defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; 4292defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; 4293} 4294defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; 4295 4296// Match reassociated forms of FNMUL. 4297def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)), 4298 (FNMULHrr FPR16:$a, FPR16:$b)>, 4299 Requires<[HasFullFP16]>; 4300def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)), 4301 (FNMULSrr FPR32:$a, FPR32:$b)>; 4302def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)), 4303 (FNMULDrr FPR64:$a, FPR64:$b)>; 4304 4305def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4306 (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; 4307def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4308 (FMINDrr FPR64:$Rn, FPR64:$Rm)>; 4309def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4310 (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; 4311def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4312 (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; 4313 4314//===----------------------------------------------------------------------===// 4315// Floating point three operand instructions. 4316//===----------------------------------------------------------------------===// 4317 4318defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; 4319defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", 4320 TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; 4321defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", 4322 TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; 4323defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", 4324 TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; 4325 4326// The following def pats catch the case where the LHS of an FMA is negated. 4327// The TriOpFrag above catches the case where the middle operand is negated. 4328 4329// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike 4330// the NEON variant. 4331 4332// Here we handle first -(a + b*c) for FNMADD: 4333 4334let Predicates = [HasNEON, HasFullFP16] in 4335def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), 4336 (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4337 4338def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), 4339 (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4340 4341def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), 4342 (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4343 4344// Now it's time for "(-a) + (-b)*c" 4345 4346let Predicates = [HasNEON, HasFullFP16] in 4347def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), 4348 (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4349 4350def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), 4351 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4352 4353def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), 4354 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4355 4356//===----------------------------------------------------------------------===// 4357// Floating point comparison instructions. 4358//===----------------------------------------------------------------------===// 4359 4360defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>; 4361defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>; 4362 4363//===----------------------------------------------------------------------===// 4364// Floating point conditional comparison instructions. 4365//===----------------------------------------------------------------------===// 4366 4367defm FCCMPE : FPCondComparison<1, "fccmpe">; 4368defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; 4369 4370//===----------------------------------------------------------------------===// 4371// Floating point conditional select instruction. 4372//===----------------------------------------------------------------------===// 4373 4374defm FCSEL : FPCondSelect<"fcsel">; 4375 4376let Predicates = [HasFullFP16] in 4377def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)), 4378 (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>; 4379 4380// CSEL instructions providing f128 types need to be handled by a 4381// pseudo-instruction since the eventual code will need to introduce basic 4382// blocks and control flow. 4383def F128CSEL : Pseudo<(outs FPR128:$Rd), 4384 (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), 4385 [(set (f128 FPR128:$Rd), 4386 (AArch64csel FPR128:$Rn, FPR128:$Rm, 4387 (i32 imm:$cond), NZCV))]> { 4388 let Uses = [NZCV]; 4389 let usesCustomInserter = 1; 4390 let hasNoSchedulingInfo = 1; 4391} 4392 4393//===----------------------------------------------------------------------===// 4394// Instructions used for emitting unwind opcodes on ARM64 Windows. 4395//===----------------------------------------------------------------------===// 4396let isPseudo = 1 in { 4397 def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>; 4398 def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4399 def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4400 def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4401 def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4402 def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4403 def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4404 def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4405 def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4406 def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4407 def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4408 def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>; 4409 def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4410 def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>; 4411 def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4412 def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>; 4413 def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4414 def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>; 4415} 4416 4417// Pseudo instructions for Windows EH 4418//===----------------------------------------------------------------------===// 4419let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, 4420 isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in { 4421 def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>; 4422 let usesCustomInserter = 1 in 4423 def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>, 4424 Sched<[]>; 4425} 4426 4427// Pseudo instructions for homogeneous prolog/epilog 4428let isPseudo = 1 in { 4429 // Save CSRs in order, {FPOffset} 4430 def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4431 // Restore CSRs in order 4432 def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4433} 4434 4435//===----------------------------------------------------------------------===// 4436// Floating point immediate move. 4437//===----------------------------------------------------------------------===// 4438 4439let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 4440defm FMOV : FPMoveImmediate<"fmov">; 4441} 4442 4443//===----------------------------------------------------------------------===// 4444// Advanced SIMD two vector instructions. 4445//===----------------------------------------------------------------------===// 4446 4447defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", 4448 AArch64uabd>; 4449// Match UABDL in log2-shuffle patterns. 4450def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), 4451 (zext (v8i8 V64:$opB))))), 4452 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4453def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4454 (v8i16 (add (sub (zext (v8i8 V64:$opA)), 4455 (zext (v8i8 V64:$opB))), 4456 (AArch64vashr v8i16:$src, (i32 15))))), 4457 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4458def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4459 (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), 4460 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4461def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4462 (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4463 (zext (extract_high_v16i8 (v16i8 V128:$opB)))), 4464 (AArch64vashr v8i16:$src, (i32 15))))), 4465 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4466def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), 4467 (zext (v4i16 V64:$opB))))), 4468 (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; 4469def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))), 4470 (zext (extract_high_v8i16 (v8i16 V128:$opB)))))), 4471 (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; 4472def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), 4473 (zext (v2i32 V64:$opB))))), 4474 (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; 4475def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))), 4476 (zext (extract_high_v4i32 (v4i32 V128:$opB)))))), 4477 (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; 4478 4479defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; 4480defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; 4481defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; 4482defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; 4483defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; 4484defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; 4485defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; 4486defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; 4487defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; 4488defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>; 4489 4490def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), 4491 (CMLTv8i8rz V64:$Rn)>; 4492def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))), 4493 (CMLTv4i16rz V64:$Rn)>; 4494def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))), 4495 (CMLTv2i32rz V64:$Rn)>; 4496def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))), 4497 (CMLTv16i8rz V128:$Rn)>; 4498def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))), 4499 (CMLTv8i16rz V128:$Rn)>; 4500def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))), 4501 (CMLTv4i32rz V128:$Rn)>; 4502def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))), 4503 (CMLTv2i64rz V128:$Rn)>; 4504 4505defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 4506defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 4507defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 4508defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 4509defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 4510defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; 4511defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; 4512defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; 4513def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), 4514 (FCVTLv4i16 V64:$Rn)>; 4515def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), 4516 (i64 4)))), 4517 (FCVTLv8i16 V128:$Rn)>; 4518def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; 4519 4520def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; 4521 4522defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; 4523defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; 4524defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; 4525defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; 4526defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; 4527def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), 4528 (FCVTNv4i16 V128:$Rn)>; 4529def : Pat<(concat_vectors V64:$Rd, 4530 (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), 4531 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4532def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; 4533def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; 4534def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), 4535 (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4536defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; 4537defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; 4538defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", 4539 int_aarch64_neon_fcvtxn>; 4540defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; 4541defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; 4542 4543// AArch64's FCVT instructions saturate when out of range. 4544multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> { 4545 let Predicates = [HasFullFP16] in { 4546 def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)), 4547 (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>; 4548 def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)), 4549 (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>; 4550 } 4551 def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)), 4552 (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>; 4553 def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)), 4554 (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>; 4555 def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)), 4556 (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>; 4557} 4558defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">; 4559defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">; 4560 4561def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; 4562def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; 4563def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; 4564def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>; 4565def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>; 4566 4567def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>; 4568def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>; 4569def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; 4570def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; 4571def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; 4572 4573defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>; 4574defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; 4575defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; 4576defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; 4577defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; 4578defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; 4579defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; 4580defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; 4581defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; 4582 4583let Predicates = [HasFRInt3264] in { 4584 defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; 4585 defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>; 4586 defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>; 4587 defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>; 4588} // HasFRInt3264 4589 4590defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; 4591defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; 4592defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", 4593 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 4594defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; 4595// Aliases for MVN -> NOT. 4596def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", 4597 (NOTv8i8 V64:$Vd, V64:$Vn)>; 4598def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", 4599 (NOTv16i8 V128:$Vd, V128:$Vn)>; 4600 4601def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4602def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4603def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4604def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4605def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4606def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4607 4608defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>; 4609defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; 4610defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; 4611defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; 4612defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", 4613 BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >; 4614defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>; 4615defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; 4616defm SHLL : SIMDVectorLShiftLongBySizeBHS; 4617defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 4618defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 4619defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; 4620defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; 4621defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; 4622defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", 4623 BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; 4624defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; 4625defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; 4626defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; 4627defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; 4628defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; 4629defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; 4630defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; 4631 4632def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 4633def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 4634def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 4635def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 4636def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 4637def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 4638def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 4639def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 4640def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; 4641def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; 4642 4643// Patterns for vector long shift (by element width). These need to match all 4644// three of zext, sext and anyext so it's easier to pull the patterns out of the 4645// definition. 4646multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { 4647 def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), 4648 (SHLLv8i8 V64:$Rn)>; 4649 def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)), 4650 (SHLLv16i8 V128:$Rn)>; 4651 def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), 4652 (SHLLv4i16 V64:$Rn)>; 4653 def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)), 4654 (SHLLv8i16 V128:$Rn)>; 4655 def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), 4656 (SHLLv2i32 V64:$Rn)>; 4657 def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)), 4658 (SHLLv4i32 V128:$Rn)>; 4659} 4660 4661defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; 4662defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; 4663defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; 4664 4665// Constant vector values, used in the S/UQXTN patterns below. 4666def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>; 4667def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>; 4668def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>; 4669def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>; 4670def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>; 4671def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>; 4672 4673// trunc(umin(X, 255)) -> UQXTRN v8i8 4674def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))), 4675 (UQXTNv8i8 V128:$Vn)>; 4676// trunc(umin(X, 65535)) -> UQXTRN v4i16 4677def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))), 4678 (UQXTNv4i16 V128:$Vn)>; 4679// trunc(smin(smax(X, -128), 128)) -> SQXTRN 4680// with reversed min/max 4681def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 4682 (v8i16 VImm7F)))), 4683 (SQXTNv8i8 V128:$Vn)>; 4684def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 4685 (v8i16 VImm80)))), 4686 (SQXTNv8i8 V128:$Vn)>; 4687// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN 4688// with reversed min/max 4689def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 4690 (v4i32 VImm7FFF)))), 4691 (SQXTNv4i16 V128:$Vn)>; 4692def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 4693 (v4i32 VImm8000)))), 4694 (SQXTNv4i16 V128:$Vn)>; 4695 4696// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) 4697// with reversed min/max 4698def : Pat<(v16i8 (concat_vectors 4699 (v8i8 V64:$Vd), 4700 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 4701 (v8i16 VImm7F)))))), 4702 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4703def : Pat<(v16i8 (concat_vectors 4704 (v8i8 V64:$Vd), 4705 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 4706 (v8i16 VImm80)))))), 4707 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4708 4709// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) 4710// with reversed min/max 4711def : Pat<(v8i16 (concat_vectors 4712 (v4i16 V64:$Vd), 4713 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 4714 (v4i32 VImm7FFF)))))), 4715 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4716def : Pat<(v8i16 (concat_vectors 4717 (v4i16 V64:$Vd), 4718 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 4719 (v4i32 VImm8000)))))), 4720 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4721 4722//===----------------------------------------------------------------------===// 4723// Advanced SIMD three vector instructions. 4724//===----------------------------------------------------------------------===// 4725 4726defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; 4727defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>; 4728defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; 4729defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; 4730defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; 4731defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; 4732defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; 4733defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; 4734foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in { 4735def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>; 4736} 4737defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; 4738let Predicates = [HasNEON] in { 4739foreach VT = [ v2f32, v4f32, v2f64 ] in 4740def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 4741} 4742let Predicates = [HasNEON, HasFullFP16] in { 4743foreach VT = [ v4f16, v8f16 ] in 4744def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 4745} 4746defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; 4747defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; 4748defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>; 4749defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; 4750defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 4751defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 4752defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 4753defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; 4754defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; 4755defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; 4756defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; 4757defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; 4758defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; 4759defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; 4760defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; 4761defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; 4762 4763// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the 4764// instruction expects the addend first, while the fma intrinsic puts it last. 4765defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", 4766 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 4767defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", 4768 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 4769 4770defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; 4771defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; 4772defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; 4773defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; 4774defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; 4775 4776// MLA and MLS are generated in MachineCombine 4777defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; 4778defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; 4779 4780defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; 4781defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; 4782defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", 4783 TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; 4784defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; 4785defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>; 4786defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; 4787defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; 4788defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; 4789defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; 4790defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; 4791defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; 4792defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; 4793defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; 4794defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; 4795defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; 4796defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; 4797defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>; 4798defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; 4799defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; 4800defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; 4801defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", 4802 TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; 4803defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; 4804defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>; 4805defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; 4806defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; 4807defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; 4808defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; 4809defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; 4810defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; 4811defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; 4812defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; 4813defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; 4814defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>; 4815defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; 4816defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; 4817defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", 4818 int_aarch64_neon_sqrdmlah>; 4819defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", 4820 int_aarch64_neon_sqrdmlsh>; 4821 4822// Extra saturate patterns, other than the intrinsics matches above 4823defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>; 4824defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>; 4825defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>; 4826defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>; 4827 4828defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; 4829defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", 4830 BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; 4831defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; 4832defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", 4833 BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; 4834defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; 4835 4836// Pseudo bitwise select pattern BSP. 4837// It is expanded into BSL/BIT/BIF after register allocation. 4838defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS), 4839 (and (vnot node:$LHS), node:$RHS))>>; 4840defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">; 4841defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; 4842defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">; 4843 4844def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), 4845 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 4846def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), 4847 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 4848def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), 4849 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 4850def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), 4851 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 4852 4853def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), 4854 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 4855def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), 4856 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 4857def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), 4858 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 4859def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), 4860 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 4861 4862def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", 4863 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; 4864def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", 4865 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 4866def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", 4867 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 4868def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", 4869 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 4870 4871def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", 4872 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; 4873def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", 4874 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 4875def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", 4876 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 4877def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", 4878 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 4879 4880def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # 4881 "|cmls.8b\t$dst, $src1, $src2}", 4882 (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 4883def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # 4884 "|cmls.16b\t$dst, $src1, $src2}", 4885 (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 4886def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # 4887 "|cmls.4h\t$dst, $src1, $src2}", 4888 (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 4889def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # 4890 "|cmls.8h\t$dst, $src1, $src2}", 4891 (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 4892def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # 4893 "|cmls.2s\t$dst, $src1, $src2}", 4894 (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 4895def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # 4896 "|cmls.4s\t$dst, $src1, $src2}", 4897 (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 4898def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # 4899 "|cmls.2d\t$dst, $src1, $src2}", 4900 (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 4901 4902def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # 4903 "|cmlo.8b\t$dst, $src1, $src2}", 4904 (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 4905def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # 4906 "|cmlo.16b\t$dst, $src1, $src2}", 4907 (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 4908def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # 4909 "|cmlo.4h\t$dst, $src1, $src2}", 4910 (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 4911def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # 4912 "|cmlo.8h\t$dst, $src1, $src2}", 4913 (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 4914def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # 4915 "|cmlo.2s\t$dst, $src1, $src2}", 4916 (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 4917def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # 4918 "|cmlo.4s\t$dst, $src1, $src2}", 4919 (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 4920def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # 4921 "|cmlo.2d\t$dst, $src1, $src2}", 4922 (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 4923 4924def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # 4925 "|cmle.8b\t$dst, $src1, $src2}", 4926 (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 4927def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # 4928 "|cmle.16b\t$dst, $src1, $src2}", 4929 (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 4930def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # 4931 "|cmle.4h\t$dst, $src1, $src2}", 4932 (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 4933def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # 4934 "|cmle.8h\t$dst, $src1, $src2}", 4935 (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 4936def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # 4937 "|cmle.2s\t$dst, $src1, $src2}", 4938 (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 4939def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # 4940 "|cmle.4s\t$dst, $src1, $src2}", 4941 (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 4942def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # 4943 "|cmle.2d\t$dst, $src1, $src2}", 4944 (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 4945 4946def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # 4947 "|cmlt.8b\t$dst, $src1, $src2}", 4948 (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 4949def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # 4950 "|cmlt.16b\t$dst, $src1, $src2}", 4951 (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 4952def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # 4953 "|cmlt.4h\t$dst, $src1, $src2}", 4954 (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 4955def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # 4956 "|cmlt.8h\t$dst, $src1, $src2}", 4957 (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 4958def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # 4959 "|cmlt.2s\t$dst, $src1, $src2}", 4960 (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 4961def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # 4962 "|cmlt.4s\t$dst, $src1, $src2}", 4963 (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 4964def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # 4965 "|cmlt.2d\t$dst, $src1, $src2}", 4966 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 4967 4968let Predicates = [HasNEON, HasFullFP16] in { 4969def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # 4970 "|fcmle.4h\t$dst, $src1, $src2}", 4971 (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 4972def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # 4973 "|fcmle.8h\t$dst, $src1, $src2}", 4974 (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 4975} 4976def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # 4977 "|fcmle.2s\t$dst, $src1, $src2}", 4978 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 4979def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # 4980 "|fcmle.4s\t$dst, $src1, $src2}", 4981 (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 4982def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # 4983 "|fcmle.2d\t$dst, $src1, $src2}", 4984 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 4985 4986let Predicates = [HasNEON, HasFullFP16] in { 4987def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # 4988 "|fcmlt.4h\t$dst, $src1, $src2}", 4989 (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 4990def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # 4991 "|fcmlt.8h\t$dst, $src1, $src2}", 4992 (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 4993} 4994def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # 4995 "|fcmlt.2s\t$dst, $src1, $src2}", 4996 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 4997def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # 4998 "|fcmlt.4s\t$dst, $src1, $src2}", 4999 (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5000def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # 5001 "|fcmlt.2d\t$dst, $src1, $src2}", 5002 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5003 5004let Predicates = [HasNEON, HasFullFP16] in { 5005def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # 5006 "|facle.4h\t$dst, $src1, $src2}", 5007 (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5008def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # 5009 "|facle.8h\t$dst, $src1, $src2}", 5010 (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5011} 5012def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # 5013 "|facle.2s\t$dst, $src1, $src2}", 5014 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5015def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # 5016 "|facle.4s\t$dst, $src1, $src2}", 5017 (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5018def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # 5019 "|facle.2d\t$dst, $src1, $src2}", 5020 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5021 5022let Predicates = [HasNEON, HasFullFP16] in { 5023def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # 5024 "|faclt.4h\t$dst, $src1, $src2}", 5025 (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5026def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # 5027 "|faclt.8h\t$dst, $src1, $src2}", 5028 (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5029} 5030def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # 5031 "|faclt.2s\t$dst, $src1, $src2}", 5032 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5033def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # 5034 "|faclt.4s\t$dst, $src1, $src2}", 5035 (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5036def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # 5037 "|faclt.2d\t$dst, $src1, $src2}", 5038 (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5039 5040//===----------------------------------------------------------------------===// 5041// Advanced SIMD three scalar instructions. 5042//===----------------------------------------------------------------------===// 5043 5044defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; 5045defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; 5046defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; 5047defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; 5048defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; 5049defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; 5050defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; 5051defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; 5052def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 5053 (FABD64 FPR64:$Rn, FPR64:$Rm)>; 5054let Predicates = [HasNEON, HasFullFP16] in { 5055def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; 5056} 5057let Predicates = [HasNEON] in { 5058def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; 5059def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; 5060} 5061defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", 5062 int_aarch64_neon_facge>; 5063defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", 5064 int_aarch64_neon_facgt>; 5065defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 5066defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 5067defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 5068defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>; 5069defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>; 5070defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>; 5071defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; 5072defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; 5073defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 5074defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; 5075defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; 5076defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; 5077defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; 5078defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; 5079defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; 5080defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; 5081defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; 5082defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; 5083defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; 5084defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; 5085defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; 5086let Predicates = [HasRDM] in { 5087 defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; 5088 defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; 5089 def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5090 (i32 FPR32:$Rm))), 5091 (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5092 def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5093 (i32 FPR32:$Rm))), 5094 (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5095} 5096 5097def : InstAlias<"cmls $dst, $src1, $src2", 5098 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5099def : InstAlias<"cmle $dst, $src1, $src2", 5100 (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5101def : InstAlias<"cmlo $dst, $src1, $src2", 5102 (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5103def : InstAlias<"cmlt $dst, $src1, $src2", 5104 (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5105def : InstAlias<"fcmle $dst, $src1, $src2", 5106 (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5107def : InstAlias<"fcmle $dst, $src1, $src2", 5108 (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5109def : InstAlias<"fcmlt $dst, $src1, $src2", 5110 (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5111def : InstAlias<"fcmlt $dst, $src1, $src2", 5112 (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5113def : InstAlias<"facle $dst, $src1, $src2", 5114 (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5115def : InstAlias<"facle $dst, $src1, $src2", 5116 (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5117def : InstAlias<"faclt $dst, $src1, $src2", 5118 (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5119def : InstAlias<"faclt $dst, $src1, $src2", 5120 (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5121 5122//===----------------------------------------------------------------------===// 5123// Advanced SIMD three scalar instructions (mixed operands). 5124//===----------------------------------------------------------------------===// 5125defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", 5126 int_aarch64_neon_sqdmulls_scalar>; 5127defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; 5128defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; 5129 5130def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), 5131 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5132 (i32 FPR32:$Rm))))), 5133 (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5134def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), 5135 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5136 (i32 FPR32:$Rm))))), 5137 (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5138 5139//===----------------------------------------------------------------------===// 5140// Advanced SIMD two scalar instructions. 5141//===----------------------------------------------------------------------===// 5142 5143defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs, [HasNoCSSC]>; 5144defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; 5145defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; 5146defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; 5147defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; 5148defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; 5149defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 5150defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 5151defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 5152defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 5153defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 5154defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; 5155defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; 5156defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; 5157defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; 5158defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; 5159defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; 5160defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; 5161defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; 5162def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; 5163defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; 5164defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; 5165defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>; 5166defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>; 5167defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>; 5168defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", 5169 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 5170defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; 5171defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 5172defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 5173defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; 5174defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; 5175defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", 5176 int_aarch64_neon_suqadd>; 5177defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; 5178defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; 5179defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", 5180 int_aarch64_neon_usqadd>; 5181 5182def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), 5183 (CMLTv1i64rz V64:$Rn)>; 5184 5185def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), 5186 (FCVTASv1i64 FPR64:$Rn)>; 5187def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), 5188 (FCVTAUv1i64 FPR64:$Rn)>; 5189def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), 5190 (FCVTMSv1i64 FPR64:$Rn)>; 5191def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), 5192 (FCVTMUv1i64 FPR64:$Rn)>; 5193def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), 5194 (FCVTNSv1i64 FPR64:$Rn)>; 5195def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), 5196 (FCVTNUv1i64 FPR64:$Rn)>; 5197def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), 5198 (FCVTPSv1i64 FPR64:$Rn)>; 5199def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), 5200 (FCVTPUv1i64 FPR64:$Rn)>; 5201def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), 5202 (FCVTZSv1i64 FPR64:$Rn)>; 5203def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), 5204 (FCVTZUv1i64 FPR64:$Rn)>; 5205 5206def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), 5207 (FRECPEv1f16 FPR16:$Rn)>; 5208def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), 5209 (FRECPEv1i32 FPR32:$Rn)>; 5210def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), 5211 (FRECPEv1i64 FPR64:$Rn)>; 5212def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), 5213 (FRECPEv1i64 FPR64:$Rn)>; 5214 5215def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), 5216 (FRECPEv1i32 FPR32:$Rn)>; 5217def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), 5218 (FRECPEv2f32 V64:$Rn)>; 5219def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), 5220 (FRECPEv4f32 FPR128:$Rn)>; 5221def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), 5222 (FRECPEv1i64 FPR64:$Rn)>; 5223def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), 5224 (FRECPEv1i64 FPR64:$Rn)>; 5225def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), 5226 (FRECPEv2f64 FPR128:$Rn)>; 5227 5228def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5229 (FRECPS32 FPR32:$Rn, FPR32:$Rm)>; 5230def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5231 (FRECPSv2f32 V64:$Rn, V64:$Rm)>; 5232def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5233 (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5234def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5235 (FRECPS64 FPR64:$Rn, FPR64:$Rm)>; 5236def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5237 (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5238 5239def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))), 5240 (FRECPXv1f16 FPR16:$Rn)>; 5241def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), 5242 (FRECPXv1i32 FPR32:$Rn)>; 5243def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), 5244 (FRECPXv1i64 FPR64:$Rn)>; 5245 5246def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))), 5247 (FRSQRTEv1f16 FPR16:$Rn)>; 5248def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), 5249 (FRSQRTEv1i32 FPR32:$Rn)>; 5250def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), 5251 (FRSQRTEv1i64 FPR64:$Rn)>; 5252def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), 5253 (FRSQRTEv1i64 FPR64:$Rn)>; 5254 5255def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), 5256 (FRSQRTEv1i32 FPR32:$Rn)>; 5257def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), 5258 (FRSQRTEv2f32 V64:$Rn)>; 5259def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), 5260 (FRSQRTEv4f32 FPR128:$Rn)>; 5261def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), 5262 (FRSQRTEv1i64 FPR64:$Rn)>; 5263def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), 5264 (FRSQRTEv1i64 FPR64:$Rn)>; 5265def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), 5266 (FRSQRTEv2f64 FPR128:$Rn)>; 5267 5268def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5269 (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>; 5270def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5271 (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>; 5272def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5273 (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5274def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5275 (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>; 5276def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5277 (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5278 5279// Some float -> int -> float conversion patterns for which we want to keep the 5280// int values in FP registers using the corresponding NEON instructions to 5281// avoid more costly int <-> fp register transfers. 5282let Predicates = [HasNEON] in { 5283def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), 5284 (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; 5285def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), 5286 (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; 5287def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), 5288 (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; 5289def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), 5290 (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; 5291 5292let Predicates = [HasFullFP16] in { 5293def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), 5294 (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; 5295def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), 5296 (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; 5297} 5298// If an integer is about to be converted to a floating point value, 5299// just load it on the floating point unit. 5300// Here are the patterns for 8 and 16-bits to float. 5301// 8-bits -> float. 5302multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy, 5303 SDPatternOperator loadop, Instruction UCVTF, 5304 ROAddrMode ro, Instruction LDRW, Instruction LDRX, 5305 SubRegIndex sub> { 5306 def : Pat<(DstTy (uint_to_fp (SrcTy 5307 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, 5308 ro.Wext:$extend))))), 5309 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5310 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 5311 sub))>; 5312 5313 def : Pat<(DstTy (uint_to_fp (SrcTy 5314 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, 5315 ro.Wext:$extend))))), 5316 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5317 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 5318 sub))>; 5319} 5320 5321defm : UIntToFPROLoadPat<f32, i32, zextloadi8, 5322 UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>; 5323def : Pat <(f32 (uint_to_fp (i32 5324 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5325 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5326 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5327def : Pat <(f32 (uint_to_fp (i32 5328 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5329 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5330 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5331// 16-bits -> float. 5332defm : UIntToFPROLoadPat<f32, i32, zextloadi16, 5333 UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>; 5334def : Pat <(f32 (uint_to_fp (i32 5335 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5336 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5337 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5338def : Pat <(f32 (uint_to_fp (i32 5339 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5340 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5341 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5342// 32-bits are handled in target specific dag combine: 5343// performIntToFpCombine. 5344// 64-bits integer to 32-bits floating point, not possible with 5345// UCVTF on floating point registers (both source and destination 5346// must have the same size). 5347 5348// Here are the patterns for 8, 16, 32, and 64-bits to double. 5349// 8-bits -> double. 5350defm : UIntToFPROLoadPat<f64, i32, zextloadi8, 5351 UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>; 5352def : Pat <(f64 (uint_to_fp (i32 5353 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5354 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5355 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5356def : Pat <(f64 (uint_to_fp (i32 5357 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5358 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5359 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5360// 16-bits -> double. 5361defm : UIntToFPROLoadPat<f64, i32, zextloadi16, 5362 UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>; 5363def : Pat <(f64 (uint_to_fp (i32 5364 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5365 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5366 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5367def : Pat <(f64 (uint_to_fp (i32 5368 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5369 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5370 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5371// 32-bits -> double. 5372defm : UIntToFPROLoadPat<f64, i32, load, 5373 UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>; 5374def : Pat <(f64 (uint_to_fp (i32 5375 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 5376 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5377 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; 5378def : Pat <(f64 (uint_to_fp (i32 5379 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), 5380 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5381 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; 5382// 64-bits -> double are handled in target specific dag combine: 5383// performIntToFpCombine. 5384} // let Predicates = [HasNEON] 5385 5386//===----------------------------------------------------------------------===// 5387// Advanced SIMD three different-sized vector instructions. 5388//===----------------------------------------------------------------------===// 5389 5390defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; 5391defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; 5392defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; 5393defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; 5394defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>; 5395defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", 5396 AArch64sabd>; 5397defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", 5398 AArch64sabd>; 5399defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", 5400 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; 5401defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", 5402 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; 5403defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", 5404 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5405defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", 5406 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5407defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>; 5408defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", 5409 int_aarch64_neon_sqadd>; 5410defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", 5411 int_aarch64_neon_sqsub>; 5412defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", 5413 int_aarch64_neon_sqdmull>; 5414defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", 5415 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; 5416defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", 5417 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; 5418defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", 5419 AArch64uabd>; 5420defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", 5421 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; 5422defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", 5423 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; 5424defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", 5425 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5426defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", 5427 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5428defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>; 5429defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", 5430 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; 5431defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", 5432 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; 5433 5434// Additional patterns for [SU]ML[AS]L 5435multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode, 5436 Instruction INST8B, Instruction INST4H, Instruction INST2S> { 5437 def : Pat<(v4i16 (opnode 5438 V64:$Ra, 5439 (v4i16 (extract_subvector 5440 (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)), 5441 (i64 0))))), 5442 (EXTRACT_SUBREG (v8i16 (INST8B 5443 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub), 5444 V64:$Rn, V64:$Rm)), dsub)>; 5445 def : Pat<(v2i32 (opnode 5446 V64:$Ra, 5447 (v2i32 (extract_subvector 5448 (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)), 5449 (i64 0))))), 5450 (EXTRACT_SUBREG (v4i32 (INST4H 5451 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub), 5452 V64:$Rn, V64:$Rm)), dsub)>; 5453 def : Pat<(v1i64 (opnode 5454 V64:$Ra, 5455 (v1i64 (extract_subvector 5456 (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)), 5457 (i64 0))))), 5458 (EXTRACT_SUBREG (v2i64 (INST2S 5459 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub), 5460 V64:$Rn, V64:$Rm)), dsub)>; 5461} 5462 5463defm : Neon_mul_acc_widen_patterns<add, AArch64umull, 5464 UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; 5465defm : Neon_mul_acc_widen_patterns<add, AArch64smull, 5466 SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; 5467defm : Neon_mul_acc_widen_patterns<sub, AArch64umull, 5468 UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; 5469defm : Neon_mul_acc_widen_patterns<sub, AArch64smull, 5470 SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; 5471 5472// CodeGen patterns for addhn and subhn instructions, which can actually be 5473// written in LLVM IR without too much difficulty. 5474 5475// Prioritize ADDHN and SUBHN over UZP2. 5476let AddedComplexity = 10 in { 5477 5478// ADDHN 5479def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), 5480 (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5481def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5482 (i32 16))))), 5483 (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5484def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5485 (i32 32))))), 5486 (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5487def : Pat<(concat_vectors (v8i8 V64:$Rd), 5488 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5489 (i32 8))))), 5490 (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5491 V128:$Rn, V128:$Rm)>; 5492def : Pat<(concat_vectors (v4i16 V64:$Rd), 5493 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5494 (i32 16))))), 5495 (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5496 V128:$Rn, V128:$Rm)>; 5497def : Pat<(concat_vectors (v2i32 V64:$Rd), 5498 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5499 (i32 32))))), 5500 (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5501 V128:$Rn, V128:$Rm)>; 5502 5503// SUBHN 5504def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), 5505 (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5506def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5507 (i32 16))))), 5508 (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5509def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5510 (i32 32))))), 5511 (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5512def : Pat<(concat_vectors (v8i8 V64:$Rd), 5513 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5514 (i32 8))))), 5515 (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5516 V128:$Rn, V128:$Rm)>; 5517def : Pat<(concat_vectors (v4i16 V64:$Rd), 5518 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5519 (i32 16))))), 5520 (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5521 V128:$Rn, V128:$Rm)>; 5522def : Pat<(concat_vectors (v2i32 V64:$Rd), 5523 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5524 (i32 32))))), 5525 (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5526 V128:$Rn, V128:$Rm)>; 5527 5528} // AddedComplexity = 10 5529 5530//---------------------------------------------------------------------------- 5531// AdvSIMD bitwise extract from vector instruction. 5532//---------------------------------------------------------------------------- 5533 5534defm EXT : SIMDBitwiseExtract<"ext">; 5535 5536def AdjustExtImm : SDNodeXForm<imm, [{ 5537 return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32); 5538}]>; 5539multiclass ExtPat<ValueType VT64, ValueType VT128, int N> { 5540 def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 5541 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 5542 def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 5543 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 5544 // We use EXT to handle extract_subvector to copy the upper 64-bits of a 5545 // 128-bit vector. 5546 def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))), 5547 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 5548 // A 64-bit EXT of two halves of the same 128-bit register can be done as a 5549 // single 128-bit EXT. 5550 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)), 5551 (extract_subvector V128:$Rn, (i64 N)), 5552 (i32 imm:$imm))), 5553 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>; 5554 // A 64-bit EXT of the high half of a 128-bit register can be done using a 5555 // 128-bit EXT of the whole register with an adjustment to the immediate. The 5556 // top half of the other operand will be unset, but that doesn't matter as it 5557 // will not be used. 5558 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)), 5559 V64:$Rm, 5560 (i32 imm:$imm))), 5561 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, 5562 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 5563 (AdjustExtImm imm:$imm)), dsub)>; 5564} 5565 5566defm : ExtPat<v8i8, v16i8, 8>; 5567defm : ExtPat<v4i16, v8i16, 4>; 5568defm : ExtPat<v4f16, v8f16, 4>; 5569defm : ExtPat<v4bf16, v8bf16, 4>; 5570defm : ExtPat<v2i32, v4i32, 2>; 5571defm : ExtPat<v2f32, v4f32, 2>; 5572defm : ExtPat<v1i64, v2i64, 1>; 5573defm : ExtPat<v1f64, v2f64, 1>; 5574 5575//---------------------------------------------------------------------------- 5576// AdvSIMD zip vector 5577//---------------------------------------------------------------------------- 5578 5579defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; 5580defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; 5581defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; 5582defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; 5583defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; 5584defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; 5585 5586def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))), 5587 (v8i8 (trunc (v8i16 V128:$Vm))))), 5588 (UZP1v16i8 V128:$Vn, V128:$Vm)>; 5589def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))), 5590 (v4i16 (trunc (v4i32 V128:$Vm))))), 5591 (UZP1v8i16 V128:$Vn, V128:$Vm)>; 5592def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), 5593 (v2i32 (trunc (v2i64 V128:$Vm))))), 5594 (UZP1v4i32 V128:$Vn, V128:$Vm)>; 5595 5596def : Pat<(v16i8 (concat_vectors 5597 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), 5598 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), 5599 (UZP2v16i8 V128:$Vn, V128:$Vm)>; 5600def : Pat<(v8i16 (concat_vectors 5601 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), 5602 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), 5603 (UZP2v8i16 V128:$Vn, V128:$Vm)>; 5604def : Pat<(v4i32 (concat_vectors 5605 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), 5606 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), 5607 (UZP2v4i32 V128:$Vn, V128:$Vm)>; 5608 5609//---------------------------------------------------------------------------- 5610// AdvSIMD TBL/TBX instructions 5611//---------------------------------------------------------------------------- 5612 5613defm TBL : SIMDTableLookup< 0, "tbl">; 5614defm TBX : SIMDTableLookupTied<1, "tbx">; 5615 5616def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 5617 (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; 5618def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 5619 (TBLv16i8One V128:$Ri, V128:$Rn)>; 5620 5621def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), 5622 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 5623 (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; 5624def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), 5625 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 5626 (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; 5627 5628 5629//---------------------------------------------------------------------------- 5630// AdvSIMD scalar DUP instruction 5631//---------------------------------------------------------------------------- 5632 5633defm DUP : SIMDScalarDUP<"mov">; 5634 5635//---------------------------------------------------------------------------- 5636// AdvSIMD scalar pairwise instructions 5637//---------------------------------------------------------------------------- 5638 5639defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; 5640defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; 5641defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; 5642defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; 5643defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; 5644defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; 5645 5646// Only the lower half of the result of the inner FADDP is used in the patterns 5647// below, so the second operand does not matter. Re-use the first input 5648// operand, so no additional dependencies need to be introduced. 5649let Predicates = [HasFullFP16] in { 5650def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))), 5651 (FADDPv2i16p 5652 (EXTRACT_SUBREG 5653 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn), 5654 dsub))>; 5655def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))), 5656 (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>; 5657} 5658def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))), 5659 (FADDPv2i32p 5660 (EXTRACT_SUBREG 5661 (FADDPv4f32 V128:$Rn, V128:$Rn), 5662 dsub))>; 5663def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))), 5664 (FADDPv2i32p V64:$Rn)>; 5665def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))), 5666 (FADDPv2i64p V128:$Rn)>; 5667 5668def : Pat<(v2i64 (AArch64saddv V128:$Rn)), 5669 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 5670def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), 5671 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 5672def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), 5673 (FADDPv2i32p V64:$Rn)>; 5674def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), 5675 (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; 5676def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), 5677 (FADDPv2i64p V128:$Rn)>; 5678def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), 5679 (FMAXNMPv2i32p V64:$Rn)>; 5680def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), 5681 (FMAXNMPv2i64p V128:$Rn)>; 5682def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), 5683 (FMAXPv2i32p V64:$Rn)>; 5684def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), 5685 (FMAXPv2i64p V128:$Rn)>; 5686def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))), 5687 (FMINNMPv2i32p V64:$Rn)>; 5688def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))), 5689 (FMINNMPv2i64p V128:$Rn)>; 5690def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), 5691 (FMINPv2i32p V64:$Rn)>; 5692def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), 5693 (FMINPv2i64p V128:$Rn)>; 5694 5695//---------------------------------------------------------------------------- 5696// AdvSIMD INS/DUP instructions 5697//---------------------------------------------------------------------------- 5698 5699def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; 5700def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; 5701def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; 5702def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; 5703def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; 5704def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; 5705def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; 5706 5707def DUPv2i64lane : SIMDDup64FromElement; 5708def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; 5709def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; 5710def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; 5711def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; 5712def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; 5713def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; 5714 5715// DUP from a 64-bit register to a 64-bit register is just a copy 5716def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))), 5717 (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>; 5718def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))), 5719 (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>; 5720 5721def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), 5722 (v2f32 (DUPv2i32lane 5723 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 5724 (i64 0)))>; 5725def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), 5726 (v4f32 (DUPv4i32lane 5727 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 5728 (i64 0)))>; 5729def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), 5730 (v2f64 (DUPv2i64lane 5731 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), 5732 (i64 0)))>; 5733def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), 5734 (v4f16 (DUPv4i16lane 5735 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5736 (i64 0)))>; 5737def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))), 5738 (v4bf16 (DUPv4i16lane 5739 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5740 (i64 0)))>; 5741def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), 5742 (v8f16 (DUPv8i16lane 5743 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5744 (i64 0)))>; 5745def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))), 5746 (v8bf16 (DUPv8i16lane 5747 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5748 (i64 0)))>; 5749 5750def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 5751 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 5752def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 5753 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 5754 5755def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 5756 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 5757def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 5758 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 5759 5760def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 5761 (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; 5762def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 5763 (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; 5764def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), 5765 (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; 5766 5767// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane 5768// instruction even if the types don't match: we just have to remap the lane 5769// carefully. N.b. this trick only applies to truncations. 5770def VecIndex_x2 : SDNodeXForm<imm, [{ 5771 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); 5772}]>; 5773def VecIndex_x4 : SDNodeXForm<imm, [{ 5774 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); 5775}]>; 5776def VecIndex_x8 : SDNodeXForm<imm, [{ 5777 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); 5778}]>; 5779 5780multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, 5781 ValueType Src128VT, ValueType ScalVT, 5782 Instruction DUP, SDNodeXForm IdxXFORM> { 5783 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), 5784 imm:$idx)))), 5785 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 5786 5787 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), 5788 imm:$idx)))), 5789 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 5790} 5791 5792defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; 5793defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; 5794defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; 5795 5796defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; 5797defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; 5798defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; 5799 5800multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, 5801 SDNodeXForm IdxXFORM> { 5802 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), 5803 imm:$idx))))), 5804 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 5805 5806 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), 5807 imm:$idx))))), 5808 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 5809} 5810 5811defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; 5812defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; 5813defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; 5814 5815defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; 5816defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; 5817defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; 5818 5819// SMOV and UMOV definitions, with some extra patterns for convenience 5820defm SMOV : SMov; 5821defm UMOV : UMov; 5822 5823def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 5824 (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; 5825def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 5826 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 5827def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 5828 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 5829def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 5830 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 5831def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 5832 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 5833def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), 5834 (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; 5835 5836def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 5837 VectorIndexB:$idx)))), i8), 5838 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 5839def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 5840 VectorIndexH:$idx)))), i16), 5841 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 5842 5843// Extracting i8 or i16 elements will have the zero-extend transformed to 5844// an 'and' mask by type legalization since neither i8 nor i16 are legal types 5845// for AArch64. Match these patterns here since UMOV already zeroes out the high 5846// bits of the destination register. 5847def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), 5848 (i32 0xff)), 5849 (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; 5850def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), 5851 (i32 0xffff)), 5852 (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; 5853 5854def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 5855 VectorIndexB:$idx)))), (i64 0xff))), 5856 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>; 5857def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 5858 VectorIndexH:$idx)))), (i64 0xffff))), 5859 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>; 5860 5861defm INS : SIMDIns; 5862 5863def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), 5864 (SUBREG_TO_REG (i32 0), 5865 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 5866def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), 5867 (SUBREG_TO_REG (i32 0), 5868 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 5869 5870// The top bits will be zero from the FMOVWSr 5871def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))), 5872 (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>; 5873 5874def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), 5875 (SUBREG_TO_REG (i32 0), 5876 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 5877def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), 5878 (SUBREG_TO_REG (i32 0), 5879 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 5880 5881def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 5882 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5883def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 5884 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5885 5886def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 5887 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5888def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 5889 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5890 5891def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), 5892 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 5893 (i32 FPR32:$Rn), ssub))>; 5894def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), 5895 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5896 (i32 FPR32:$Rn), ssub))>; 5897 5898def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), 5899 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 5900 (i64 FPR64:$Rn), dsub))>; 5901 5902def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 5903 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5904def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 5905 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5906 5907def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 5908 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5909def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 5910 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5911 5912def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), 5913 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 5914def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), 5915 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 5916 5917def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), 5918 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; 5919 5920def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), 5921 (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 5922 (EXTRACT_SUBREG 5923 (INSvi16lane 5924 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 5925 VectorIndexS:$imm, 5926 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 5927 (i64 0)), 5928 dsub)>; 5929 5930def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0), 5931 (i64 VectorIndexH:$imm)), 5932 (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>; 5933def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0), 5934 (i64 VectorIndexS:$imm)), 5935 (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>; 5936def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), 5937 (i64 VectorIndexD:$imm)), 5938 (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>; 5939 5940def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), 5941 (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 5942 (INSvi16lane 5943 V128:$Rn, VectorIndexH:$imm, 5944 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 5945 (i64 0))>; 5946 5947def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn), 5948 (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 5949 (EXTRACT_SUBREG 5950 (INSvi16lane 5951 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 5952 VectorIndexS:$imm, 5953 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 5954 (i64 0)), 5955 dsub)>; 5956 5957def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn), 5958 (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 5959 (INSvi16lane 5960 V128:$Rn, VectorIndexH:$imm, 5961 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 5962 (i64 0))>; 5963 5964def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), 5965 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 5966 (EXTRACT_SUBREG 5967 (INSvi32lane 5968 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 5969 VectorIndexS:$imm, 5970 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 5971 (i64 0)), 5972 dsub)>; 5973def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), 5974 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 5975 (INSvi32lane 5976 V128:$Rn, VectorIndexS:$imm, 5977 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 5978 (i64 0))>; 5979def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), 5980 (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), 5981 (INSvi64lane 5982 V128:$Rn, VectorIndexD:$imm, 5983 (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), 5984 (i64 0))>; 5985 5986// Copy an element at a constant index in one vector into a constant indexed 5987// element of another. 5988// FIXME refactor to a shared class/dev parameterized on vector type, vector 5989// index type and INS extension 5990def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane 5991 (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), 5992 VectorIndexB:$idx2)), 5993 (v16i8 (INSvi8lane 5994 V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) 5995 )>; 5996def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane 5997 (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), 5998 VectorIndexH:$idx2)), 5999 (v8i16 (INSvi16lane 6000 V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) 6001 )>; 6002def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane 6003 (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), 6004 VectorIndexS:$idx2)), 6005 (v4i32 (INSvi32lane 6006 V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) 6007 )>; 6008def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane 6009 (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), 6010 VectorIndexD:$idx2)), 6011 (v2i64 (INSvi64lane 6012 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) 6013 )>; 6014 6015multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, 6016 ValueType VTScal, Instruction INS> { 6017 def : Pat<(VT128 (vector_insert V128:$src, 6018 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6019 imm:$Immd)), 6020 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; 6021 6022 def : Pat<(VT128 (vector_insert V128:$src, 6023 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6024 imm:$Immd)), 6025 (INS V128:$src, imm:$Immd, 6026 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; 6027 6028 def : Pat<(VT64 (vector_insert V64:$src, 6029 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6030 imm:$Immd)), 6031 (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), 6032 imm:$Immd, V128:$Rn, imm:$Immn), 6033 dsub)>; 6034 6035 def : Pat<(VT64 (vector_insert V64:$src, 6036 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6037 imm:$Immd)), 6038 (EXTRACT_SUBREG 6039 (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, 6040 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), 6041 dsub)>; 6042} 6043 6044defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; 6045defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>; 6046defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; 6047defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; 6048 6049// Insert from bitcast 6050// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0) 6051def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6052 (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>; 6053def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)), 6054 (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>; 6055 6056// bitcast of an extract 6057// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) 6058def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), 6059 (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>; 6060def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))), 6061 (EXTRACT_SUBREG V128:$src, ssub)>; 6062def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), 6063 (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>; 6064def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))), 6065 (EXTRACT_SUBREG V128:$src, dsub)>; 6066 6067// Floating point vector extractions are codegen'd as either a sequence of 6068// subregister extractions, or a MOV (aka DUP here) if 6069// the lane number is anything other than zero. 6070def : Pat<(vector_extract (v2f64 V128:$Rn), 0), 6071 (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; 6072def : Pat<(vector_extract (v4f32 V128:$Rn), 0), 6073 (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; 6074def : Pat<(vector_extract (v8f16 V128:$Rn), 0), 6075 (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6076def : Pat<(vector_extract (v8bf16 V128:$Rn), 0), 6077 (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6078 6079 6080def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), 6081 (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>; 6082def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), 6083 (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>; 6084def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), 6085 (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6086def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), 6087 (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6088 6089// All concat_vectors operations are canonicalised to act on i64 vectors for 6090// AArch64. In the general case we need an instruction, which had just as well be 6091// INS. 6092class ConcatPat<ValueType DstTy, ValueType SrcTy> 6093 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), 6094 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, 6095 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; 6096 6097def : ConcatPat<v2i64, v1i64>; 6098def : ConcatPat<v2f64, v1f64>; 6099def : ConcatPat<v4i32, v2i32>; 6100def : ConcatPat<v4f32, v2f32>; 6101def : ConcatPat<v8i16, v4i16>; 6102def : ConcatPat<v8f16, v4f16>; 6103def : ConcatPat<v8bf16, v4bf16>; 6104def : ConcatPat<v16i8, v8i8>; 6105 6106// If the high lanes are undef, though, we can just ignore them: 6107class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> 6108 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), 6109 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; 6110 6111def : ConcatUndefPat<v2i64, v1i64>; 6112def : ConcatUndefPat<v2f64, v1f64>; 6113def : ConcatUndefPat<v4i32, v2i32>; 6114def : ConcatUndefPat<v4f32, v2f32>; 6115def : ConcatUndefPat<v8i16, v4i16>; 6116def : ConcatUndefPat<v16i8, v8i8>; 6117 6118//---------------------------------------------------------------------------- 6119// AdvSIMD across lanes instructions 6120//---------------------------------------------------------------------------- 6121 6122defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; 6123defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; 6124defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; 6125defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; 6126defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; 6127defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; 6128defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; 6129defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; 6130defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; 6131defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; 6132defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; 6133 6134multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> { 6135 // Patterns for addv(addlp(x)) ==> addlv 6136 def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, 6137 (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))), 6138 (i64 0))), (i64 0))), 6139 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6140 (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>; 6141 def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))), 6142 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6143 (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>; 6144 def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))), 6145 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>; 6146 6147 // Patterns for addp(addlp(x))) ==> addlv 6148 def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))), 6149 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>; 6150 def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))), 6151 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>; 6152} 6153 6154defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>; 6155defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>; 6156 6157// Patterns for across-vector intrinsics, that have a node equivalent, that 6158// returns a vector (with only the low lane defined) instead of a scalar. 6159// In effect, opNode is the same as (scalar_to_vector (IntNode)). 6160multiclass SIMDAcrossLanesIntrinsic<string baseOpc, 6161 SDPatternOperator opNode> { 6162// If a lane instruction caught the vector_extract around opNode, we can 6163// directly match the latter to the instruction. 6164def : Pat<(v8i8 (opNode V64:$Rn)), 6165 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6166 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; 6167def : Pat<(v16i8 (opNode V128:$Rn)), 6168 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6169 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; 6170def : Pat<(v4i16 (opNode V64:$Rn)), 6171 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6172 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; 6173def : Pat<(v8i16 (opNode V128:$Rn)), 6174 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6175 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; 6176def : Pat<(v4i32 (opNode V128:$Rn)), 6177 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6178 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; 6179 6180 6181// If none did, fallback to the explicit patterns, consuming the vector_extract. 6182def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), 6183 (i64 0)), (i64 0))), 6184 (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6185 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), 6186 bsub), ssub)>; 6187def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), 6188 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6189 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), 6190 bsub), ssub)>; 6191def : Pat<(i32 (vector_extract (insert_subvector undef, 6192 (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))), 6193 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6194 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), 6195 hsub), ssub)>; 6196def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), 6197 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6198 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), 6199 hsub), ssub)>; 6200def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), 6201 (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6202 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), 6203 ssub), ssub)>; 6204 6205} 6206 6207multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, 6208 SDPatternOperator opNode> 6209 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6210// If there is a sign extension after this intrinsic, consume it as smov already 6211// performed it 6212def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6213 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)), 6214 (i32 (SMOVvi8to32 6215 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6216 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6217 (i64 0)))>; 6218def : Pat<(i32 (sext_inreg (i32 (vector_extract 6219 (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), 6220 (i32 (SMOVvi8to32 6221 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6222 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6223 (i64 0)))>; 6224def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6225 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)), 6226 (i32 (SMOVvi16to32 6227 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6228 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6229 (i64 0)))>; 6230def : Pat<(i32 (sext_inreg (i32 (vector_extract 6231 (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), 6232 (i32 (SMOVvi16to32 6233 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6234 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6235 (i64 0)))>; 6236} 6237 6238multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, 6239 SDPatternOperator opNode> 6240 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6241// If there is a masking operation keeping only what has been actually 6242// generated, consume it. 6243def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6244 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)), 6245 (i32 (EXTRACT_SUBREG 6246 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6247 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6248 ssub))>; 6249def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), 6250 maski8_or_more)), 6251 (i32 (EXTRACT_SUBREG 6252 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6253 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6254 ssub))>; 6255def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6256 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)), 6257 (i32 (EXTRACT_SUBREG 6258 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6259 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6260 ssub))>; 6261def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), 6262 maski16_or_more)), 6263 (i32 (EXTRACT_SUBREG 6264 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6265 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6266 ssub))>; 6267} 6268 6269defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; 6270// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6271def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), 6272 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6273 6274defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; 6275// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6276def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), 6277 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6278 6279defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; 6280def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), 6281 (SMAXPv2i32 V64:$Rn, V64:$Rn)>; 6282 6283defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; 6284def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), 6285 (SMINPv2i32 V64:$Rn, V64:$Rn)>; 6286 6287defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; 6288def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), 6289 (UMAXPv2i32 V64:$Rn, V64:$Rn)>; 6290 6291defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; 6292def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), 6293 (UMINPv2i32 V64:$Rn, V64:$Rn)>; 6294 6295multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { 6296 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6297 (i32 (SMOVvi16to32 6298 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6299 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6300 (i64 0)))>; 6301def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6302 (i32 (SMOVvi16to32 6303 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6304 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6305 (i64 0)))>; 6306 6307def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6308 (i32 (EXTRACT_SUBREG 6309 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6310 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6311 ssub))>; 6312def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6313 (i32 (EXTRACT_SUBREG 6314 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6315 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6316 ssub))>; 6317 6318def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6319 (i64 (EXTRACT_SUBREG 6320 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6321 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6322 dsub))>; 6323} 6324 6325multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, 6326 Intrinsic intOp> { 6327 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6328 (i32 (EXTRACT_SUBREG 6329 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6330 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6331 ssub))>; 6332def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6333 (i32 (EXTRACT_SUBREG 6334 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6335 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6336 ssub))>; 6337 6338def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6339 (i32 (EXTRACT_SUBREG 6340 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6341 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6342 ssub))>; 6343def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6344 (i32 (EXTRACT_SUBREG 6345 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6346 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6347 ssub))>; 6348 6349def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6350 (i64 (EXTRACT_SUBREG 6351 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6352 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6353 dsub))>; 6354} 6355 6356defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; 6357defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; 6358 6359// The vaddlv_s32 intrinsic gets mapped to SADDLP. 6360def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), 6361 (i64 (EXTRACT_SUBREG 6362 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6363 (SADDLPv2i32_v1i64 V64:$Rn), dsub), 6364 dsub))>; 6365// The vaddlv_u32 intrinsic gets mapped to UADDLP. 6366def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), 6367 (i64 (EXTRACT_SUBREG 6368 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6369 (UADDLPv2i32_v1i64 V64:$Rn), dsub), 6370 dsub))>; 6371 6372//------------------------------------------------------------------------------ 6373// AdvSIMD modified immediate instructions 6374//------------------------------------------------------------------------------ 6375 6376// AdvSIMD BIC 6377defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; 6378// AdvSIMD ORR 6379defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; 6380 6381def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6382def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6383def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6384def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6385 6386def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6387def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6388def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6389def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6390 6391def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6392def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6393def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6394def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6395 6396def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6397def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6398def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6399def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6400 6401// AdvSIMD FMOV 6402def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, 6403 "fmov", ".2d", 6404 [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6405def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, 6406 "fmov", ".2s", 6407 [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6408def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, 6409 "fmov", ".4s", 6410 [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6411let Predicates = [HasNEON, HasFullFP16] in { 6412def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, 6413 "fmov", ".4h", 6414 [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6415def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, 6416 "fmov", ".8h", 6417 [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6418} // Predicates = [HasNEON, HasFullFP16] 6419 6420// AdvSIMD MOVI 6421 6422// EDIT byte mask: scalar 6423let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6424def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", 6425 [(set FPR64:$Rd, simdimmtype10:$imm8)]>; 6426// The movi_edit node has the immediate value already encoded, so we use 6427// a plain imm0_255 here. 6428def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), 6429 (MOVID imm0_255:$shift)>; 6430 6431// EDIT byte mask: 2d 6432 6433// The movi_edit node has the immediate value already encoded, so we use 6434// a plain imm0_255 in the pattern 6435let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6436def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, 6437 simdimmtype10, 6438 "movi", ".2d", 6439 [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; 6440 6441def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6442def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6443def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6444def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6445 6446def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6447def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6448def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6449def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6450 6451// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the 6452// extract is free and this gives better MachineCSE results. 6453def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6454def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6455def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6456def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6457 6458def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6459def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6460def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6461def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6462 6463// EDIT per word & halfword: 2s, 4h, 4s, & 8h 6464let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6465defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; 6466 6467let Predicates = [HasNEON] in { 6468 // Using the MOVI to materialize fp constants. 6469 def : Pat<(f32 fpimm32SIMDModImmType4:$in), 6470 (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in), 6471 (i32 24)), 6472 ssub)>; 6473} 6474 6475def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6476def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6477def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6478def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6479 6480def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6481def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6482def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6483def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6484 6485def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6486 (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; 6487def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6488 (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; 6489def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6490 (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; 6491def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6492 (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; 6493 6494let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 6495// EDIT per word: 2s & 4s with MSL shifter 6496def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", 6497 [(set (v2i32 V64:$Rd), 6498 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6499def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", 6500 [(set (v4i32 V128:$Rd), 6501 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6502 6503// Per byte: 8b & 16b 6504def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, 6505 "movi", ".8b", 6506 [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; 6507 6508def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, 6509 "movi", ".16b", 6510 [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; 6511} 6512 6513// AdvSIMD MVNI 6514 6515// EDIT per word & halfword: 2s, 4h, 4s, & 8h 6516let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6517defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; 6518 6519def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6520def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6521def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6522def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6523 6524def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6525def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6526def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6527def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6528 6529def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6530 (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; 6531def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6532 (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; 6533def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6534 (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; 6535def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6536 (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; 6537 6538// EDIT per word: 2s & 4s with MSL shifter 6539let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 6540def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", 6541 [(set (v2i32 V64:$Rd), 6542 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6543def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", 6544 [(set (v4i32 V128:$Rd), 6545 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6546} 6547 6548//---------------------------------------------------------------------------- 6549// AdvSIMD indexed element 6550//---------------------------------------------------------------------------- 6551 6552let hasSideEffects = 0 in { 6553 defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; 6554 defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; 6555} 6556 6557// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the 6558// instruction expects the addend first, while the intrinsic expects it last. 6559 6560// On the other hand, there are quite a few valid combinatorial options due to 6561// the commutativity of multiplication and the fact that (-x) * y = x * (-y). 6562defm : SIMDFPIndexedTiedPatterns<"FMLA", 6563 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>; 6564defm : SIMDFPIndexedTiedPatterns<"FMLA", 6565 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>; 6566 6567defm : SIMDFPIndexedTiedPatterns<"FMLS", 6568 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 6569defm : SIMDFPIndexedTiedPatterns<"FMLS", 6570 TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; 6571defm : SIMDFPIndexedTiedPatterns<"FMLS", 6572 TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; 6573defm : SIMDFPIndexedTiedPatterns<"FMLS", 6574 TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; 6575 6576multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { 6577 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit 6578 // and DUP scalar. 6579 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6580 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 6581 VectorIndexS:$idx))), 6582 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; 6583 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6584 (v2f32 (AArch64duplane32 6585 (v4f32 (insert_subvector undef, 6586 (v2f32 (fneg V64:$Rm)), 6587 (i64 0))), 6588 VectorIndexS:$idx)))), 6589 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 6590 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 6591 VectorIndexS:$idx)>; 6592 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6593 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 6594 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 6595 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 6596 6597 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit 6598 // and DUP scalar. 6599 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6600 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 6601 VectorIndexS:$idx))), 6602 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, 6603 VectorIndexS:$idx)>; 6604 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6605 (v4f32 (AArch64duplane32 6606 (v4f32 (insert_subvector undef, 6607 (v2f32 (fneg V64:$Rm)), 6608 (i64 0))), 6609 VectorIndexS:$idx)))), 6610 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 6611 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 6612 VectorIndexS:$idx)>; 6613 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6614 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 6615 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 6616 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 6617 6618 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar 6619 // (DUPLANE from 64-bit would be trivial). 6620 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 6621 (AArch64duplane64 (v2f64 (fneg V128:$Rm)), 6622 VectorIndexD:$idx))), 6623 (FMLSv2i64_indexed 6624 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; 6625 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 6626 (AArch64dup (f64 (fneg FPR64Op:$Rm))))), 6627 (FMLSv2i64_indexed V128:$Rd, V128:$Rn, 6628 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; 6629 6630 // 2 variants for 32-bit scalar version: extract from .2s or from .4s 6631 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 6632 (vector_extract (v4f32 (fneg V128:$Rm)), 6633 VectorIndexS:$idx))), 6634 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 6635 V128:$Rm, VectorIndexS:$idx)>; 6636 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 6637 (vector_extract (v4f32 (insert_subvector undef, 6638 (v2f32 (fneg V64:$Rm)), 6639 (i64 0))), 6640 VectorIndexS:$idx))), 6641 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 6642 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; 6643 6644 // 1 variant for 64-bit scalar version: extract from .1d or from .2d 6645 def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), 6646 (vector_extract (v2f64 (fneg V128:$Rm)), 6647 VectorIndexS:$idx))), 6648 (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, 6649 V128:$Rm, VectorIndexS:$idx)>; 6650} 6651 6652defm : FMLSIndexedAfterNegPatterns< 6653 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 6654defm : FMLSIndexedAfterNegPatterns< 6655 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >; 6656 6657defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; 6658defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>; 6659 6660def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 6661 (FMULv2i32_indexed V64:$Rn, 6662 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 6663 (i64 0))>; 6664def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 6665 (FMULv4i32_indexed V128:$Rn, 6666 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 6667 (i64 0))>; 6668def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), 6669 (FMULv2i64_indexed V128:$Rn, 6670 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), 6671 (i64 0))>; 6672 6673defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; 6674defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 6675 6676defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane, 6677 int_aarch64_neon_sqdmulh_laneq>; 6678defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane, 6679 int_aarch64_neon_sqrdmulh_laneq>; 6680 6681// Generated by MachineCombine 6682defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>; 6683defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; 6684 6685defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; 6686defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", 6687 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 6688defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", 6689 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 6690defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>; 6691defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", 6692 int_aarch64_neon_sqadd>; 6693defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", 6694 int_aarch64_neon_sqsub>; 6695defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", 6696 int_aarch64_neon_sqrdmlah>; 6697defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", 6698 int_aarch64_neon_sqrdmlsh>; 6699defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; 6700defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", 6701 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 6702defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", 6703 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 6704defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>; 6705 6706// A scalar sqdmull with the second operand being a vector lane can be 6707// handled directly with the indexed instruction encoding. 6708def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 6709 (vector_extract (v4i32 V128:$Vm), 6710 VectorIndexS:$idx)), 6711 (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; 6712 6713//---------------------------------------------------------------------------- 6714// AdvSIMD scalar shift instructions 6715//---------------------------------------------------------------------------- 6716defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; 6717defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; 6718defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; 6719defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; 6720// Codegen patterns for the above. We don't put these directly on the 6721// instructions because TableGen's type inference can't handle the truth. 6722// Having the same base pattern for fp <--> int totally freaks it out. 6723def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), 6724 (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; 6725def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), 6726 (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; 6727def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), 6728 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 6729def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), 6730 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 6731def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), 6732 vecshiftR64:$imm)), 6733 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 6734def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), 6735 vecshiftR64:$imm)), 6736 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 6737def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), 6738 (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 6739def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 6740 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 6741def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), 6742 vecshiftR64:$imm)), 6743 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 6744def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 6745 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 6746def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), 6747 vecshiftR64:$imm)), 6748 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 6749def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), 6750 (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 6751 6752// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported. 6753 6754def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), 6755 (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 6756def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), 6757 (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 6758def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 6759 (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; 6760def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp 6761 (and FPR32:$Rn, (i32 65535)), 6762 vecshiftR16:$imm)), 6763 (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 6764def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), 6765 (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 6766def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 6767 (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; 6768def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), 6769 (i32 (INSERT_SUBREG 6770 (i32 (IMPLICIT_DEF)), 6771 (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), 6772 hsub))>; 6773def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), 6774 (i64 (INSERT_SUBREG 6775 (i64 (IMPLICIT_DEF)), 6776 (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), 6777 hsub))>; 6778def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), 6779 (i32 (INSERT_SUBREG 6780 (i32 (IMPLICIT_DEF)), 6781 (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), 6782 hsub))>; 6783def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), 6784 (i64 (INSERT_SUBREG 6785 (i64 (IMPLICIT_DEF)), 6786 (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), 6787 hsub))>; 6788def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 6789 (i32 (INSERT_SUBREG 6790 (i32 (IMPLICIT_DEF)), 6791 (FACGE16 FPR16:$Rn, FPR16:$Rm), 6792 hsub))>; 6793def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 6794 (i32 (INSERT_SUBREG 6795 (i32 (IMPLICIT_DEF)), 6796 (FACGT16 FPR16:$Rn, FPR16:$Rm), 6797 hsub))>; 6798 6799defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; 6800defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; 6801defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", 6802 int_aarch64_neon_sqrshrn>; 6803defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", 6804 int_aarch64_neon_sqrshrun>; 6805defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 6806defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 6807defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", 6808 int_aarch64_neon_sqshrn>; 6809defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", 6810 int_aarch64_neon_sqshrun>; 6811defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; 6812defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; 6813defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", 6814 TriOpFrag<(add node:$LHS, 6815 (AArch64srshri node:$MHS, node:$RHS))>>; 6816defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; 6817defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", 6818 TriOpFrag<(add_and_or_is_add node:$LHS, 6819 (AArch64vashr node:$MHS, node:$RHS))>>; 6820defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", 6821 int_aarch64_neon_uqrshrn>; 6822defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 6823defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", 6824 int_aarch64_neon_uqshrn>; 6825defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; 6826defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", 6827 TriOpFrag<(add node:$LHS, 6828 (AArch64urshri node:$MHS, node:$RHS))>>; 6829defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; 6830defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", 6831 TriOpFrag<(add_and_or_is_add node:$LHS, 6832 (AArch64vlshr node:$MHS, node:$RHS))>>; 6833 6834//---------------------------------------------------------------------------- 6835// AdvSIMD vector shift instructions 6836//---------------------------------------------------------------------------- 6837defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; 6838defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; 6839defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", 6840 int_aarch64_neon_vcvtfxs2fp>; 6841defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", 6842 BinOpFrag<(trunc (AArch64roundingvlshr node:$LHS, node:$RHS))>>; 6843defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; 6844defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", 6845 BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; 6846defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>; 6847def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 6848 (i32 vecshiftL64:$imm))), 6849 (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; 6850defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", 6851 int_aarch64_neon_sqrshrn>; 6852defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", 6853 int_aarch64_neon_sqrshrun>; 6854defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 6855defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 6856defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", 6857 int_aarch64_neon_sqshrn>; 6858defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", 6859 int_aarch64_neon_sqshrun>; 6860defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>; 6861def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 6862 (i32 vecshiftR64:$imm))), 6863 (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; 6864defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; 6865defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", 6866 TriOpFrag<(add node:$LHS, 6867 (AArch64srshri node:$MHS, node:$RHS))> >; 6868defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", 6869 BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; 6870 6871defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; 6872defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", 6873 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; 6874defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", 6875 int_aarch64_neon_vcvtfxu2fp>; 6876defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", 6877 int_aarch64_neon_uqrshrn>; 6878defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 6879defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", 6880 int_aarch64_neon_uqshrn>; 6881defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; 6882defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", 6883 TriOpFrag<(add node:$LHS, 6884 (AArch64urshri node:$MHS, node:$RHS))> >; 6885defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", 6886 BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; 6887defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; 6888defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", 6889 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; 6890 6891// RADDHN patterns for when RSHRN shifts by half the size of the vector element 6892def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))), 6893 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; 6894def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))), 6895 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; 6896let AddedComplexity = 5 in 6897def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))), 6898 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; 6899 6900// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element 6901def : Pat<(v16i8 (concat_vectors 6902 (v8i8 V64:$Vd), 6903 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))), 6904 (RADDHNv8i16_v16i8 6905 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 6906 (v8i16 (MOVIv2d_ns (i32 0))))>; 6907def : Pat<(v8i16 (concat_vectors 6908 (v4i16 V64:$Vd), 6909 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))), 6910 (RADDHNv4i32_v8i16 6911 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 6912 (v4i32 (MOVIv2d_ns (i32 0))))>; 6913let AddedComplexity = 5 in 6914def : Pat<(v4i32 (concat_vectors 6915 (v2i32 V64:$Vd), 6916 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))), 6917 (RADDHNv2i64_v4i32 6918 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 6919 (v2i64 (MOVIv2d_ns (i32 0))))>; 6920 6921// SHRN patterns for when a logical right shift was used instead of arithmetic 6922// (the immediate guarantees no sign bits actually end up in the result so it 6923// doesn't matter). 6924def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), 6925 (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; 6926def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), 6927 (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; 6928def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), 6929 (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; 6930 6931def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), 6932 (trunc (AArch64vlshr (v8i16 V128:$Rn), 6933 vecshiftR16Narrow:$imm)))), 6934 (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 6935 V128:$Rn, vecshiftR16Narrow:$imm)>; 6936def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), 6937 (trunc (AArch64vlshr (v4i32 V128:$Rn), 6938 vecshiftR32Narrow:$imm)))), 6939 (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 6940 V128:$Rn, vecshiftR32Narrow:$imm)>; 6941def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), 6942 (trunc (AArch64vlshr (v2i64 V128:$Rn), 6943 vecshiftR64Narrow:$imm)))), 6944 (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 6945 V128:$Rn, vecshiftR32Narrow:$imm)>; 6946 6947// Vector sign and zero extensions are implemented with SSHLL and USSHLL. 6948// Anyexts are implemented as zexts. 6949def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; 6950def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 6951def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 6952def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; 6953def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 6954def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 6955def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; 6956def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 6957def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 6958// Also match an extend from the upper half of a 128 bit source register. 6959def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 6960 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 6961def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 6962 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 6963def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 6964 (SSHLLv16i8_shift V128:$Rn, (i32 0))>; 6965def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 6966 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 6967def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 6968 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 6969def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 6970 (SSHLLv8i16_shift V128:$Rn, (i32 0))>; 6971def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 6972 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 6973def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 6974 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 6975def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 6976 (SSHLLv4i32_shift V128:$Rn, (i32 0))>; 6977 6978// Vector shift sxtl aliases 6979def : InstAlias<"sxtl.8h $dst, $src1", 6980 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 6981def : InstAlias<"sxtl $dst.8h, $src1.8b", 6982 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 6983def : InstAlias<"sxtl.4s $dst, $src1", 6984 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 6985def : InstAlias<"sxtl $dst.4s, $src1.4h", 6986 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 6987def : InstAlias<"sxtl.2d $dst, $src1", 6988 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 6989def : InstAlias<"sxtl $dst.2d, $src1.2s", 6990 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 6991 6992// Vector shift sxtl2 aliases 6993def : InstAlias<"sxtl2.8h $dst, $src1", 6994 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 6995def : InstAlias<"sxtl2 $dst.8h, $src1.16b", 6996 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 6997def : InstAlias<"sxtl2.4s $dst, $src1", 6998 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 6999def : InstAlias<"sxtl2 $dst.4s, $src1.8h", 7000 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7001def : InstAlias<"sxtl2.2d $dst, $src1", 7002 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7003def : InstAlias<"sxtl2 $dst.2d, $src1.4s", 7004 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7005 7006// Vector shift uxtl aliases 7007def : InstAlias<"uxtl.8h $dst, $src1", 7008 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7009def : InstAlias<"uxtl $dst.8h, $src1.8b", 7010 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7011def : InstAlias<"uxtl.4s $dst, $src1", 7012 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7013def : InstAlias<"uxtl $dst.4s, $src1.4h", 7014 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7015def : InstAlias<"uxtl.2d $dst, $src1", 7016 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7017def : InstAlias<"uxtl $dst.2d, $src1.2s", 7018 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7019 7020// Vector shift uxtl2 aliases 7021def : InstAlias<"uxtl2.8h $dst, $src1", 7022 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7023def : InstAlias<"uxtl2 $dst.8h, $src1.16b", 7024 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7025def : InstAlias<"uxtl2.4s $dst, $src1", 7026 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7027def : InstAlias<"uxtl2 $dst.4s, $src1.8h", 7028 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7029def : InstAlias<"uxtl2.2d $dst, $src1", 7030 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7031def : InstAlias<"uxtl2 $dst.2d, $src1.4s", 7032 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7033 7034// If an integer is about to be converted to a floating point value, 7035// just load it on the floating point unit. 7036// These patterns are more complex because floating point loads do not 7037// support sign extension. 7038// The sign extension has to be explicitly added and is only supported for 7039// one step: byte-to-half, half-to-word, word-to-doubleword. 7040// SCVTF GPR -> FPR is 9 cycles. 7041// SCVTF FPR -> FPR is 4 cyclces. 7042// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. 7043// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR 7044// and still being faster. 7045// However, this is not good for code size. 7046// 8-bits -> float. 2 sizes step-up. 7047class SExtLoadi8CVTf32Pat<dag addrmode, dag INST> 7048 : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), 7049 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7050 (SSHLLv4i16_shift 7051 (f64 7052 (EXTRACT_SUBREG 7053 (SSHLLv8i8_shift 7054 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7055 INST, 7056 bsub), 7057 0), 7058 dsub)), 7059 0), 7060 ssub)))>, 7061 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7062 7063def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), 7064 (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; 7065def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), 7066 (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; 7067def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), 7068 (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; 7069def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), 7070 (LDURBi GPR64sp:$Rn, simm9:$offset)>; 7071 7072// 16-bits -> float. 1 size step-up. 7073class SExtLoadi16CVTf32Pat<dag addrmode, dag INST> 7074 : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7075 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7076 (SSHLLv4i16_shift 7077 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7078 INST, 7079 hsub), 7080 0), 7081 ssub)))>, 7082 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7083 7084def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7085 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7086def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7087 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7088def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7089 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7090def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7091 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7092 7093// 32-bits to 32-bits are handled in target specific dag combine: 7094// performIntToFpCombine. 7095// 64-bits integer to 32-bits floating point, not possible with 7096// SCVTF on floating point registers (both source and destination 7097// must have the same size). 7098 7099// Here are the patterns for 8, 16, 32, and 64-bits to double. 7100// 8-bits -> double. 3 size step-up: give up. 7101// 16-bits -> double. 2 size step. 7102class SExtLoadi16CVTf64Pat<dag addrmode, dag INST> 7103 : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7104 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7105 (SSHLLv2i32_shift 7106 (f64 7107 (EXTRACT_SUBREG 7108 (SSHLLv4i16_shift 7109 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7110 INST, 7111 hsub), 7112 0), 7113 dsub)), 7114 0), 7115 dsub)))>, 7116 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7117 7118def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7119 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7120def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7121 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7122def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7123 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7124def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7125 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7126// 32-bits -> double. 1 size step-up. 7127class SExtLoadi32CVTf64Pat<dag addrmode, dag INST> 7128 : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), 7129 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7130 (SSHLLv2i32_shift 7131 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7132 INST, 7133 ssub), 7134 0), 7135 dsub)))>, 7136 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7137 7138def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), 7139 (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; 7140def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), 7141 (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; 7142def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), 7143 (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; 7144def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), 7145 (LDURSi GPR64sp:$Rn, simm9:$offset)>; 7146 7147// 64-bits -> double are handled in target specific dag combine: 7148// performIntToFpCombine. 7149 7150 7151//---------------------------------------------------------------------------- 7152// AdvSIMD Load-Store Structure 7153//---------------------------------------------------------------------------- 7154defm LD1 : SIMDLd1Multiple<"ld1">; 7155defm LD2 : SIMDLd2Multiple<"ld2">; 7156defm LD3 : SIMDLd3Multiple<"ld3">; 7157defm LD4 : SIMDLd4Multiple<"ld4">; 7158 7159defm ST1 : SIMDSt1Multiple<"st1">; 7160defm ST2 : SIMDSt2Multiple<"st2">; 7161defm ST3 : SIMDSt3Multiple<"st3">; 7162defm ST4 : SIMDSt4Multiple<"st4">; 7163 7164class Ld1Pat<ValueType ty, Instruction INST> 7165 : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; 7166 7167def : Ld1Pat<v16i8, LD1Onev16b>; 7168def : Ld1Pat<v8i16, LD1Onev8h>; 7169def : Ld1Pat<v4i32, LD1Onev4s>; 7170def : Ld1Pat<v2i64, LD1Onev2d>; 7171def : Ld1Pat<v8i8, LD1Onev8b>; 7172def : Ld1Pat<v4i16, LD1Onev4h>; 7173def : Ld1Pat<v2i32, LD1Onev2s>; 7174def : Ld1Pat<v1i64, LD1Onev1d>; 7175 7176class St1Pat<ValueType ty, Instruction INST> 7177 : Pat<(store ty:$Vt, GPR64sp:$Rn), 7178 (INST ty:$Vt, GPR64sp:$Rn)>; 7179 7180def : St1Pat<v16i8, ST1Onev16b>; 7181def : St1Pat<v8i16, ST1Onev8h>; 7182def : St1Pat<v4i32, ST1Onev4s>; 7183def : St1Pat<v2i64, ST1Onev2d>; 7184def : St1Pat<v8i8, ST1Onev8b>; 7185def : St1Pat<v4i16, ST1Onev4h>; 7186def : St1Pat<v2i32, ST1Onev2s>; 7187def : St1Pat<v1i64, ST1Onev1d>; 7188 7189//--- 7190// Single-element 7191//--- 7192 7193defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; 7194defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; 7195defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; 7196defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; 7197let mayLoad = 1, hasSideEffects = 0 in { 7198defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; 7199defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; 7200defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; 7201defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; 7202defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; 7203defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; 7204defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; 7205defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; 7206defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; 7207defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; 7208defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; 7209defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; 7210defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; 7211defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; 7212defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; 7213defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; 7214} 7215 7216def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7217 (LD1Rv8b GPR64sp:$Rn)>; 7218def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7219 (LD1Rv16b GPR64sp:$Rn)>; 7220def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7221 (LD1Rv4h GPR64sp:$Rn)>; 7222def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7223 (LD1Rv8h GPR64sp:$Rn)>; 7224def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7225 (LD1Rv2s GPR64sp:$Rn)>; 7226def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7227 (LD1Rv4s GPR64sp:$Rn)>; 7228def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7229 (LD1Rv2d GPR64sp:$Rn)>; 7230def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7231 (LD1Rv1d GPR64sp:$Rn)>; 7232// Grab the floating point version too 7233def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7234 (LD1Rv2s GPR64sp:$Rn)>; 7235def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7236 (LD1Rv4s GPR64sp:$Rn)>; 7237def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7238 (LD1Rv2d GPR64sp:$Rn)>; 7239def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7240 (LD1Rv1d GPR64sp:$Rn)>; 7241def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7242 (LD1Rv4h GPR64sp:$Rn)>; 7243def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7244 (LD1Rv8h GPR64sp:$Rn)>; 7245def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7246 (LD1Rv4h GPR64sp:$Rn)>; 7247def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7248 (LD1Rv8h GPR64sp:$Rn)>; 7249 7250class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, 7251 ValueType VTy, ValueType STy, Instruction LD1> 7252 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7253 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7254 (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; 7255 7256def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; 7257def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; 7258def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; 7259def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; 7260def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; 7261def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; 7262def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>; 7263def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>; 7264 7265// Generate LD1 for extload if memory type does not match the 7266// destination type, for example: 7267// 7268// (v4i32 (insert_vector_elt (load anyext from i8) idx)) 7269// 7270// In this case, the index must be adjusted to match LD1 type. 7271// 7272class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand 7273 VecIndex, ValueType VTy, ValueType STy, 7274 Instruction LD1, SDNodeXForm IdxOp> 7275 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7276 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7277 (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>; 7278 7279def VectorIndexStoH : SDNodeXForm<imm, [{ 7280 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7281}]>; 7282def VectorIndexStoB : SDNodeXForm<imm, [{ 7283 return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64); 7284}]>; 7285def VectorIndexHtoB : SDNodeXForm<imm, [{ 7286 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7287}]>; 7288 7289def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>; 7290def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>; 7291def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>; 7292 7293// Same as above, but the first element is populated using 7294// scalar_to_vector + insert_subvector instead of insert_vector_elt. 7295let Predicates = [NotInStreamingSVEMode] in { 7296 class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy, 7297 SDPatternOperator ExtLoad, Instruction LD1> 7298 : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))), 7299 (ResultTy (EXTRACT_SUBREG 7300 (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>; 7301 7302 def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>; 7303 def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>; 7304 def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>; 7305} 7306class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, 7307 ValueType VTy, ValueType STy, Instruction LD1> 7308 : Pat<(vector_insert (VTy VecListOne64:$Rd), 7309 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7310 (EXTRACT_SUBREG 7311 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 7312 VecIndex:$idx, GPR64sp:$Rn), 7313 dsub)>; 7314 7315def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; 7316def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; 7317def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; 7318def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; 7319def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>; 7320def : Ld1Lane64Pat<load, VectorIndexH, v4bf16, bf16, LD1i16>; 7321 7322 7323defm LD1 : SIMDLdSt1SingleAliases<"ld1">; 7324defm LD2 : SIMDLdSt2SingleAliases<"ld2">; 7325defm LD3 : SIMDLdSt3SingleAliases<"ld3">; 7326defm LD4 : SIMDLdSt4SingleAliases<"ld4">; 7327 7328// Stores 7329defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; 7330defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; 7331defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; 7332defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; 7333 7334let AddedComplexity = 19 in 7335class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, 7336 ValueType VTy, ValueType STy, Instruction ST1> 7337 : Pat<(scalar_store 7338 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7339 GPR64sp:$Rn), 7340 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; 7341 7342def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; 7343def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; 7344def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; 7345def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; 7346def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; 7347def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; 7348def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>; 7349def : St1Lane128Pat<store, VectorIndexH, v8bf16, bf16, ST1i16>; 7350 7351let AddedComplexity = 19 in 7352class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, 7353 ValueType VTy, ValueType STy, Instruction ST1> 7354 : Pat<(scalar_store 7355 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7356 GPR64sp:$Rn), 7357 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7358 VecIndex:$idx, GPR64sp:$Rn)>; 7359 7360def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; 7361def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; 7362def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; 7363def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; 7364def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>; 7365def : St1Lane64Pat<store, VectorIndexH, v4bf16, bf16, ST1i16>; 7366 7367multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, 7368 ValueType VTy, ValueType STy, Instruction ST1, 7369 int offset> { 7370 def : Pat<(scalar_store 7371 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7372 GPR64sp:$Rn, offset), 7373 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7374 VecIndex:$idx, GPR64sp:$Rn, XZR)>; 7375 7376 def : Pat<(scalar_store 7377 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7378 GPR64sp:$Rn, GPR64:$Rm), 7379 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7380 VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 7381} 7382 7383defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; 7384defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, 7385 2>; 7386defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; 7387defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; 7388defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; 7389defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; 7390defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>; 7391defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>; 7392 7393multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, 7394 ValueType VTy, ValueType STy, Instruction ST1, 7395 int offset> { 7396 def : Pat<(scalar_store 7397 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7398 GPR64sp:$Rn, offset), 7399 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; 7400 7401 def : Pat<(scalar_store 7402 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7403 GPR64sp:$Rn, GPR64:$Rm), 7404 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 7405} 7406 7407defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, 7408 1>; 7409defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, 7410 2>; 7411defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; 7412defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; 7413defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; 7414defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; 7415defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>; 7416defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>; 7417 7418let mayStore = 1, hasSideEffects = 0 in { 7419defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; 7420defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; 7421defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; 7422defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; 7423defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; 7424defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; 7425defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; 7426defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; 7427defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; 7428defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; 7429defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; 7430defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; 7431} 7432 7433defm ST1 : SIMDLdSt1SingleAliases<"st1">; 7434defm ST2 : SIMDLdSt2SingleAliases<"st2">; 7435defm ST3 : SIMDLdSt3SingleAliases<"st3">; 7436defm ST4 : SIMDLdSt4SingleAliases<"st4">; 7437 7438//---------------------------------------------------------------------------- 7439// Crypto extensions 7440//---------------------------------------------------------------------------- 7441 7442let Predicates = [HasAES] in { 7443def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; 7444def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; 7445def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; 7446def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; 7447} 7448 7449// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required 7450// for AES fusion on some CPUs. 7451let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 7452def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 7453 Sched<[WriteVq]>; 7454def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 7455 Sched<[WriteVq]>; 7456} 7457 7458// Only use constrained versions of AES(I)MC instructions if they are paired with 7459// AESE/AESD. 7460def : Pat<(v16i8 (int_aarch64_crypto_aesmc 7461 (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), 7462 (v16i8 V128:$src2))))), 7463 (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), 7464 (v16i8 V128:$src2)))))>, 7465 Requires<[HasFuseAES]>; 7466 7467def : Pat<(v16i8 (int_aarch64_crypto_aesimc 7468 (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), 7469 (v16i8 V128:$src2))))), 7470 (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), 7471 (v16i8 V128:$src2)))))>, 7472 Requires<[HasFuseAES]>; 7473 7474let Predicates = [HasSHA2] in { 7475def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; 7476def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; 7477def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; 7478def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; 7479def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; 7480def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; 7481def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; 7482 7483def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; 7484def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; 7485def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; 7486} 7487 7488//---------------------------------------------------------------------------- 7489// Compiler-pseudos 7490//---------------------------------------------------------------------------- 7491// FIXME: Like for X86, these should go in their own separate .td file. 7492 7493// For an anyext, we don't care what the high bits are, so we can perform an 7494// INSERT_SUBREF into an IMPLICIT_DEF. 7495def : Pat<(i64 (anyext GPR32:$src)), 7496 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; 7497 7498// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and 7499// then assert the extension has happened. 7500def : Pat<(i64 (zext GPR32:$src)), 7501 (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 7502 7503// To sign extend, we use a signed bitfield move instruction (SBFM) on the 7504// containing super-reg. 7505def : Pat<(i64 (sext GPR32:$src)), 7506 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 7507def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; 7508def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; 7509def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; 7510def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; 7511def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; 7512def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; 7513def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; 7514 7515def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), 7516 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 7517 (i64 (i32shift_sext_i8 imm0_31:$imm)))>; 7518def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), 7519 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 7520 (i64 (i64shift_sext_i8 imm0_63:$imm)))>; 7521 7522def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), 7523 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 7524 (i64 (i32shift_sext_i16 imm0_31:$imm)))>; 7525def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), 7526 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 7527 (i64 (i64shift_sext_i16 imm0_63:$imm)))>; 7528 7529def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), 7530 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7531 (i64 (i64shift_a imm0_63:$imm)), 7532 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 7533 7534def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)), 7535 (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7536 (i64 (i64shift_a imm0_63:$imm)), 7537 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 7538 7539// sra patterns have an AddedComplexity of 10, so make sure we have a higher 7540// AddedComplexity for the following patterns since we want to match sext + sra 7541// patterns before we attempt to match a single sra node. 7542let AddedComplexity = 20 in { 7543// We support all sext + sra combinations which preserve at least one bit of the 7544// original value which is to be sign extended. E.g. we support shifts up to 7545// bitwidth-1 bits. 7546def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), 7547 (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; 7548def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), 7549 (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; 7550 7551def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), 7552 (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; 7553def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), 7554 (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; 7555 7556def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), 7557 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7558 (i64 imm0_31:$imm), 31)>; 7559} // AddedComplexity = 20 7560 7561// To truncate, we can simply extract from a subregister. 7562def : Pat<(i32 (trunc GPR64sp:$src)), 7563 (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; 7564 7565// __builtin_trap() uses the BRK instruction on AArch64. 7566def : Pat<(trap), (BRK 1)>; 7567def : Pat<(debugtrap), (BRK 0xF000)>; 7568 7569def ubsan_trap_xform : SDNodeXForm<timm, [{ 7570 return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32); 7571}]>; 7572 7573def ubsan_trap_imm : TImmLeaf<i32, [{ 7574 return isUInt<8>(Imm); 7575}], ubsan_trap_xform>; 7576 7577def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>; 7578 7579// Multiply high patterns which multiply the lower subvector using smull/umull 7580// and the upper subvector with smull2/umull2. Then shuffle the high the high 7581// part of both results together. 7582def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)), 7583 (UZP2v16i8 7584 (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 7585 (EXTRACT_SUBREG V128:$Rm, dsub)), 7586 (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 7587def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)), 7588 (UZP2v8i16 7589 (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 7590 (EXTRACT_SUBREG V128:$Rm, dsub)), 7591 (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 7592def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)), 7593 (UZP2v4i32 7594 (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 7595 (EXTRACT_SUBREG V128:$Rm, dsub)), 7596 (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 7597 7598def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)), 7599 (UZP2v16i8 7600 (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 7601 (EXTRACT_SUBREG V128:$Rm, dsub)), 7602 (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 7603def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)), 7604 (UZP2v8i16 7605 (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 7606 (EXTRACT_SUBREG V128:$Rm, dsub)), 7607 (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 7608def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)), 7609 (UZP2v4i32 7610 (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 7611 (EXTRACT_SUBREG V128:$Rm, dsub)), 7612 (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 7613 7614// Conversions within AdvSIMD types in the same register size are free. 7615// But because we need a consistent lane ordering, in big endian many 7616// conversions require one or more REV instructions. 7617// 7618// Consider a simple memory load followed by a bitconvert then a store. 7619// v0 = load v2i32 7620// v1 = BITCAST v2i32 v0 to v4i16 7621// store v4i16 v2 7622// 7623// In big endian mode every memory access has an implicit byte swap. LDR and 7624// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that 7625// is, they treat the vector as a sequence of elements to be byte-swapped. 7626// The two pairs of instructions are fundamentally incompatible. We've decided 7627// to use LD1/ST1 only to simplify compiler implementation. 7628// 7629// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes 7630// the original code sequence: 7631// v0 = load v2i32 7632// v1 = REV v2i32 (implicit) 7633// v2 = BITCAST v2i32 v1 to v4i16 7634// v3 = REV v4i16 v2 (implicit) 7635// store v4i16 v3 7636// 7637// But this is now broken - the value stored is different to the value loaded 7638// due to lane reordering. To fix this, on every BITCAST we must perform two 7639// other REVs: 7640// v0 = load v2i32 7641// v1 = REV v2i32 (implicit) 7642// v2 = REV v2i32 7643// v3 = BITCAST v2i32 v2 to v4i16 7644// v4 = REV v4i16 7645// v5 = REV v4i16 v4 (implicit) 7646// store v4i16 v5 7647// 7648// This means an extra two instructions, but actually in most cases the two REV 7649// instructions can be combined into one. For example: 7650// (REV64_2s (REV64_4h X)) === (REV32_4h X) 7651// 7652// There is also no 128-bit REV instruction. This must be synthesized with an 7653// EXT instruction. 7654// 7655// Most bitconverts require some sort of conversion. The only exceptions are: 7656// a) Identity conversions - vNfX <-> vNiX 7657// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX 7658// 7659 7660// Natural vector casts (64 bit) 7661foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 7662 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 7663 def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))), 7664 (VT FPR64:$src)>; 7665 7666// Natural vector casts (128 bit) 7667foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7668 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7669 def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))), 7670 (VT FPR128:$src)>; 7671 7672let Predicates = [IsLE] in { 7673def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7674def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7675def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7676def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7677def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7678def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7679 7680def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 7681 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7682def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 7683 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7684def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 7685 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7686def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 7687 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7688def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 7689 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7690def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 7691 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7692def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 7693 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7694} 7695let Predicates = [IsBE] in { 7696def : Pat<(v8i8 (bitconvert GPR64:$Xn)), 7697 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7698def : Pat<(v4i16 (bitconvert GPR64:$Xn)), 7699 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7700def : Pat<(v2i32 (bitconvert GPR64:$Xn)), 7701 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7702def : Pat<(v4f16 (bitconvert GPR64:$Xn)), 7703 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7704def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), 7705 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7706def : Pat<(v2f32 (bitconvert GPR64:$Xn)), 7707 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7708 7709def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 7710 (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7711def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 7712 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7713def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 7714 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7715def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 7716 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7717def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 7718 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7719def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 7720 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7721} 7722def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7723def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7724def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), 7725 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7726def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), 7727 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7728def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), 7729 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7730def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; 7731 7732def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), 7733 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; 7734def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), 7735 (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; 7736def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), 7737 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7738def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), 7739 (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; 7740def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 7741 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7742 7743def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>; 7744def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>; 7745 7746let Predicates = [IsLE] in { 7747def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; 7748def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; 7749def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; 7750def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; 7751def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>; 7752def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; 7753} 7754let Predicates = [IsBE] in { 7755def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), 7756 (v1i64 (REV64v2i32 FPR64:$src))>; 7757def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), 7758 (v1i64 (REV64v4i16 FPR64:$src))>; 7759def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), 7760 (v1i64 (REV64v8i8 FPR64:$src))>; 7761def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), 7762 (v1i64 (REV64v4i16 FPR64:$src))>; 7763def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), 7764 (v1i64 (REV64v4i16 FPR64:$src))>; 7765def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), 7766 (v1i64 (REV64v2i32 FPR64:$src))>; 7767} 7768def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; 7769def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 7770 7771let Predicates = [IsLE] in { 7772def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; 7773def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; 7774def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; 7775def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 7776def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; 7777def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; 7778def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>; 7779} 7780let Predicates = [IsBE] in { 7781def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), 7782 (v2i32 (REV64v2i32 FPR64:$src))>; 7783def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), 7784 (v2i32 (REV32v4i16 FPR64:$src))>; 7785def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), 7786 (v2i32 (REV32v8i8 FPR64:$src))>; 7787def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), 7788 (v2i32 (REV64v2i32 FPR64:$src))>; 7789def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), 7790 (v2i32 (REV64v2i32 FPR64:$src))>; 7791def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), 7792 (v2i32 (REV32v4i16 FPR64:$src))>; 7793def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), 7794 (v2i32 (REV32v4i16 FPR64:$src))>; 7795} 7796def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; 7797 7798let Predicates = [IsLE] in { 7799def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; 7800def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; 7801def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; 7802def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 7803def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; 7804def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; 7805} 7806let Predicates = [IsBE] in { 7807def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), 7808 (v4i16 (REV64v4i16 FPR64:$src))>; 7809def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), 7810 (v4i16 (REV32v4i16 FPR64:$src))>; 7811def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), 7812 (v4i16 (REV16v8i8 FPR64:$src))>; 7813def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), 7814 (v4i16 (REV64v4i16 FPR64:$src))>; 7815def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), 7816 (v4i16 (REV32v4i16 FPR64:$src))>; 7817def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), 7818 (v4i16 (REV64v4i16 FPR64:$src))>; 7819} 7820def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; 7821def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>; 7822 7823let Predicates = [IsLE] in { 7824def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; 7825def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; 7826def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; 7827def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; 7828def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; 7829def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; 7830 7831def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>; 7832def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; 7833def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; 7834def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 7835def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>; 7836def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 7837} 7838let Predicates = [IsBE] in { 7839def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), 7840 (v4f16 (REV64v4i16 FPR64:$src))>; 7841def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), 7842 (v4f16 (REV32v4i16 FPR64:$src))>; 7843def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), 7844 (v4f16 (REV16v8i8 FPR64:$src))>; 7845def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), 7846 (v4f16 (REV64v4i16 FPR64:$src))>; 7847def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), 7848 (v4f16 (REV32v4i16 FPR64:$src))>; 7849def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), 7850 (v4f16 (REV64v4i16 FPR64:$src))>; 7851 7852def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), 7853 (v4bf16 (REV64v4i16 FPR64:$src))>; 7854def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), 7855 (v4bf16 (REV32v4i16 FPR64:$src))>; 7856def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), 7857 (v4bf16 (REV16v8i8 FPR64:$src))>; 7858def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), 7859 (v4bf16 (REV64v4i16 FPR64:$src))>; 7860def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), 7861 (v4bf16 (REV32v4i16 FPR64:$src))>; 7862def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), 7863 (v4bf16 (REV64v4i16 FPR64:$src))>; 7864} 7865def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; 7866def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; 7867 7868let Predicates = [IsLE] in { 7869def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; 7870def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; 7871def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; 7872def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 7873def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; 7874def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; 7875def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; 7876def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>; 7877} 7878let Predicates = [IsBE] in { 7879def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), 7880 (v8i8 (REV64v8i8 FPR64:$src))>; 7881def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), 7882 (v8i8 (REV32v8i8 FPR64:$src))>; 7883def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), 7884 (v8i8 (REV16v8i8 FPR64:$src))>; 7885def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), 7886 (v8i8 (REV64v8i8 FPR64:$src))>; 7887def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), 7888 (v8i8 (REV32v8i8 FPR64:$src))>; 7889def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), 7890 (v8i8 (REV64v8i8 FPR64:$src))>; 7891def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), 7892 (v8i8 (REV16v8i8 FPR64:$src))>; 7893def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), 7894 (v8i8 (REV16v8i8 FPR64:$src))>; 7895} 7896 7897let Predicates = [IsLE] in { 7898def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; 7899def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; 7900def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; 7901def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; 7902def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; 7903def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>; 7904} 7905let Predicates = [IsBE] in { 7906def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), 7907 (f64 (REV64v2i32 FPR64:$src))>; 7908def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), 7909 (f64 (REV64v4i16 FPR64:$src))>; 7910def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), 7911 (f64 (REV64v2i32 FPR64:$src))>; 7912def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), 7913 (f64 (REV64v8i8 FPR64:$src))>; 7914def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), 7915 (f64 (REV64v4i16 FPR64:$src))>; 7916def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), 7917 (f64 (REV64v4i16 FPR64:$src))>; 7918} 7919def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; 7920def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; 7921 7922let Predicates = [IsLE] in { 7923def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; 7924def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; 7925def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; 7926def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; 7927def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; 7928def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>; 7929} 7930let Predicates = [IsBE] in { 7931def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), 7932 (v1f64 (REV64v2i32 FPR64:$src))>; 7933def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), 7934 (v1f64 (REV64v4i16 FPR64:$src))>; 7935def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), 7936 (v1f64 (REV64v8i8 FPR64:$src))>; 7937def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), 7938 (v1f64 (REV64v2i32 FPR64:$src))>; 7939def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), 7940 (v1f64 (REV64v4i16 FPR64:$src))>; 7941def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), 7942 (v1f64 (REV64v4i16 FPR64:$src))>; 7943} 7944def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; 7945def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 7946 7947let Predicates = [IsLE] in { 7948def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; 7949def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; 7950def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; 7951def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; 7952def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 7953def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; 7954def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>; 7955} 7956let Predicates = [IsBE] in { 7957def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), 7958 (v2f32 (REV64v2i32 FPR64:$src))>; 7959def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), 7960 (v2f32 (REV32v4i16 FPR64:$src))>; 7961def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), 7962 (v2f32 (REV32v8i8 FPR64:$src))>; 7963def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), 7964 (v2f32 (REV64v2i32 FPR64:$src))>; 7965def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), 7966 (v2f32 (REV64v2i32 FPR64:$src))>; 7967def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), 7968 (v2f32 (REV32v4i16 FPR64:$src))>; 7969def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), 7970 (v2f32 (REV32v4i16 FPR64:$src))>; 7971} 7972def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; 7973 7974let Predicates = [IsLE] in { 7975def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; 7976def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; 7977def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; 7978def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; 7979def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; 7980def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; 7981def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>; 7982def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; 7983} 7984let Predicates = [IsBE] in { 7985def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), 7986 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 7987def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), 7988 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 7989 (REV64v4i32 FPR128:$src), (i32 8)))>; 7990def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), 7991 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 7992 (REV64v8i16 FPR128:$src), (i32 8)))>; 7993def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), 7994 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 7995 (REV64v8i16 FPR128:$src), (i32 8)))>; 7996def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), 7997 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 7998 (REV64v8i16 FPR128:$src), (i32 8)))>; 7999def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), 8000 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8001def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), 8002 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8003 (REV64v4i32 FPR128:$src), (i32 8)))>; 8004def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), 8005 (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), 8006 (REV64v16i8 FPR128:$src), (i32 8)))>; 8007} 8008 8009let Predicates = [IsLE] in { 8010def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; 8011def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; 8012def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; 8013def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; 8014def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>; 8015def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; 8016def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; 8017} 8018let Predicates = [IsBE] in { 8019def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), 8020 (v2f64 (EXTv16i8 FPR128:$src, 8021 FPR128:$src, (i32 8)))>; 8022def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), 8023 (v2f64 (REV64v4i32 FPR128:$src))>; 8024def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), 8025 (v2f64 (REV64v8i16 FPR128:$src))>; 8026def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), 8027 (v2f64 (REV64v8i16 FPR128:$src))>; 8028def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), 8029 (v2f64 (REV64v8i16 FPR128:$src))>; 8030def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), 8031 (v2f64 (REV64v16i8 FPR128:$src))>; 8032def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), 8033 (v2f64 (REV64v4i32 FPR128:$src))>; 8034} 8035def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; 8036 8037let Predicates = [IsLE] in { 8038def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; 8039def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; 8040def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; 8041def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>; 8042def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; 8043def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; 8044def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; 8045} 8046let Predicates = [IsBE] in { 8047def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), 8048 (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8049 (REV64v4i32 FPR128:$src), (i32 8)))>; 8050def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), 8051 (v4f32 (REV32v8i16 FPR128:$src))>; 8052def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), 8053 (v4f32 (REV32v8i16 FPR128:$src))>; 8054def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), 8055 (v4f32 (REV32v8i16 FPR128:$src))>; 8056def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), 8057 (v4f32 (REV32v16i8 FPR128:$src))>; 8058def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), 8059 (v4f32 (REV64v4i32 FPR128:$src))>; 8060def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), 8061 (v4f32 (REV64v4i32 FPR128:$src))>; 8062} 8063def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; 8064 8065let Predicates = [IsLE] in { 8066def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; 8067def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; 8068def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; 8069def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; 8070def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; 8071def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; 8072def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>; 8073} 8074let Predicates = [IsBE] in { 8075def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), 8076 (v2i64 (EXTv16i8 FPR128:$src, 8077 FPR128:$src, (i32 8)))>; 8078def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), 8079 (v2i64 (REV64v4i32 FPR128:$src))>; 8080def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), 8081 (v2i64 (REV64v8i16 FPR128:$src))>; 8082def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), 8083 (v2i64 (REV64v16i8 FPR128:$src))>; 8084def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), 8085 (v2i64 (REV64v4i32 FPR128:$src))>; 8086def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), 8087 (v2i64 (REV64v8i16 FPR128:$src))>; 8088def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), 8089 (v2i64 (REV64v8i16 FPR128:$src))>; 8090} 8091def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; 8092 8093let Predicates = [IsLE] in { 8094def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; 8095def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; 8096def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; 8097def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; 8098def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; 8099def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; 8100def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>; 8101} 8102let Predicates = [IsBE] in { 8103def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), 8104 (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8105 (REV64v4i32 FPR128:$src), 8106 (i32 8)))>; 8107def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), 8108 (v4i32 (REV64v4i32 FPR128:$src))>; 8109def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), 8110 (v4i32 (REV32v8i16 FPR128:$src))>; 8111def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), 8112 (v4i32 (REV32v16i8 FPR128:$src))>; 8113def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), 8114 (v4i32 (REV64v4i32 FPR128:$src))>; 8115def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), 8116 (v4i32 (REV32v8i16 FPR128:$src))>; 8117def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), 8118 (v4i32 (REV32v8i16 FPR128:$src))>; 8119} 8120def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; 8121 8122let Predicates = [IsLE] in { 8123def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; 8124def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; 8125def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; 8126def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; 8127def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; 8128def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; 8129} 8130let Predicates = [IsBE] in { 8131def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), 8132 (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8133 (REV64v8i16 FPR128:$src), 8134 (i32 8)))>; 8135def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), 8136 (v8i16 (REV64v8i16 FPR128:$src))>; 8137def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), 8138 (v8i16 (REV32v8i16 FPR128:$src))>; 8139def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), 8140 (v8i16 (REV16v16i8 FPR128:$src))>; 8141def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), 8142 (v8i16 (REV64v8i16 FPR128:$src))>; 8143def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), 8144 (v8i16 (REV32v8i16 FPR128:$src))>; 8145} 8146def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; 8147def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>; 8148 8149let Predicates = [IsLE] in { 8150def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; 8151def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; 8152def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; 8153def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; 8154def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; 8155def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; 8156 8157def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>; 8158def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8159def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8160def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; 8161def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8162def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8163} 8164let Predicates = [IsBE] in { 8165def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), 8166 (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8167 (REV64v8i16 FPR128:$src), 8168 (i32 8)))>; 8169def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), 8170 (v8f16 (REV64v8i16 FPR128:$src))>; 8171def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), 8172 (v8f16 (REV32v8i16 FPR128:$src))>; 8173def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), 8174 (v8f16 (REV16v16i8 FPR128:$src))>; 8175def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), 8176 (v8f16 (REV64v8i16 FPR128:$src))>; 8177def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), 8178 (v8f16 (REV32v8i16 FPR128:$src))>; 8179 8180def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), 8181 (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8182 (REV64v8i16 FPR128:$src), 8183 (i32 8)))>; 8184def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), 8185 (v8bf16 (REV64v8i16 FPR128:$src))>; 8186def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), 8187 (v8bf16 (REV32v8i16 FPR128:$src))>; 8188def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), 8189 (v8bf16 (REV16v16i8 FPR128:$src))>; 8190def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), 8191 (v8bf16 (REV64v8i16 FPR128:$src))>; 8192def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), 8193 (v8bf16 (REV32v8i16 FPR128:$src))>; 8194} 8195def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; 8196def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; 8197 8198let Predicates = [IsLE] in { 8199def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; 8200def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; 8201def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; 8202def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; 8203def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; 8204def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; 8205def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; 8206def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>; 8207} 8208let Predicates = [IsBE] in { 8209def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), 8210 (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), 8211 (REV64v16i8 FPR128:$src), 8212 (i32 8)))>; 8213def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), 8214 (v16i8 (REV64v16i8 FPR128:$src))>; 8215def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), 8216 (v16i8 (REV32v16i8 FPR128:$src))>; 8217def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), 8218 (v16i8 (REV16v16i8 FPR128:$src))>; 8219def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), 8220 (v16i8 (REV64v16i8 FPR128:$src))>; 8221def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), 8222 (v16i8 (REV32v16i8 FPR128:$src))>; 8223def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), 8224 (v16i8 (REV16v16i8 FPR128:$src))>; 8225def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), 8226 (v16i8 (REV16v16i8 FPR128:$src))>; 8227} 8228 8229def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), 8230 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8231def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), 8232 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8233def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), 8234 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8235def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), 8236 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8237def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))), 8238 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8239def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), 8240 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8241def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), 8242 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8243def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), 8244 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8245 8246def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), 8247 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8248def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), 8249 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8250def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), 8251 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8252def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), 8253 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8254 8255// A 64-bit subvector insert to the first 128-bit vector position 8256// is a subregister copy that needs no instruction. 8257multiclass InsertSubvectorUndef<ValueType Ty> { 8258 def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), 8259 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8260 def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), 8261 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8262 def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), 8263 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8264 def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), 8265 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8266 def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), 8267 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8268 def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), 8269 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8270 def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)), 8271 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8272 def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), 8273 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8274} 8275 8276defm : InsertSubvectorUndef<i32>; 8277defm : InsertSubvectorUndef<i64>; 8278 8279// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 8280// or v2f32. 8281def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), 8282 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), 8283 (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; 8284def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), 8285 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), 8286 (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; 8287 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, 8288 // so we match on v4f32 here, not v2f32. This will also catch adding 8289 // the low two lanes of a true v4f32 vector. 8290def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), 8291 (vector_extract (v4f32 FPR128:$Rn), (i64 1))), 8292 (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8293def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), 8294 (vector_extract (v8f16 FPR128:$Rn), (i64 1))), 8295 (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8296 8297// Scalar 64-bit shifts in FPR64 registers. 8298def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8299 (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8300def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8301 (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8302def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8303 (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8304def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8305 (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8306 8307// Patterns for nontemporal/no-allocate stores. 8308// We have to resort to tricks to turn a single-input store into a store pair, 8309// because there is no single-input nontemporal store, only STNP. 8310let Predicates = [IsLE] in { 8311let AddedComplexity = 15 in { 8312class NTStore128Pat<ValueType VT> : 8313 Pat<(nontemporalstore (VT FPR128:$Rt), 8314 (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 8315 (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), 8316 (DUPi64 FPR128:$Rt, (i64 1)), 8317 GPR64sp:$Rn, simm7s8:$offset)>; 8318 8319def : NTStore128Pat<v2i64>; 8320def : NTStore128Pat<v4i32>; 8321def : NTStore128Pat<v8i16>; 8322def : NTStore128Pat<v16i8>; 8323 8324class NTStore64Pat<ValueType VT> : 8325 Pat<(nontemporalstore (VT FPR64:$Rt), 8326 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 8327 (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), 8328 (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), 8329 GPR64sp:$Rn, simm7s4:$offset)>; 8330 8331// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? 8332def : NTStore64Pat<v1f64>; 8333def : NTStore64Pat<v1i64>; 8334def : NTStore64Pat<v2i32>; 8335def : NTStore64Pat<v4i16>; 8336def : NTStore64Pat<v8i8>; 8337 8338def : Pat<(nontemporalstore GPR64:$Rt, 8339 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 8340 (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), 8341 (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), 8342 GPR64sp:$Rn, simm7s4:$offset)>; 8343} // AddedComplexity=10 8344} // Predicates = [IsLE] 8345 8346// Tail call return handling. These are all compiler pseudo-instructions, 8347// so no encoding information or anything like that. 8348let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { 8349 def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, 8350 Sched<[WriteBrReg]>; 8351 def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, 8352 Sched<[WriteBrReg]>; 8353 // Indirect tail-call with any register allowed, used by MachineOutliner when 8354 // this is proven safe. 8355 // FIXME: If we have to add any more hacks like this, we should instead relax 8356 // some verifier checks for outlined functions. 8357 def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>, 8358 Sched<[WriteBrReg]>; 8359 // Indirect tail-call limited to only use registers (x16 and x17) which are 8360 // allowed to tail-call a "BTI c" instruction. 8361 def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>, 8362 Sched<[WriteBrReg]>; 8363} 8364 8365def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), 8366 (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>, 8367 Requires<[NotUseBTI]>; 8368def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)), 8369 (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>, 8370 Requires<[UseBTI]>; 8371def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), 8372 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 8373def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), 8374 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 8375 8376def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; 8377def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; 8378 8379// Extracting lane zero is a special case where we can just use a plain 8380// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the 8381// rest of the compiler, especially the register allocator and copy propagation, 8382// to reason about, so is preferred when it's possible to use it. 8383let AddedComplexity = 10 in { 8384 def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; 8385 def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; 8386 def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; 8387} 8388 8389// dot_v4i8 8390class mul_v4i8<SDPatternOperator ldop> : 8391 PatFrag<(ops node:$Rn, node:$Rm, node:$offset), 8392 (mul (ldop (add node:$Rn, node:$offset)), 8393 (ldop (add node:$Rm, node:$offset)))>; 8394class mulz_v4i8<SDPatternOperator ldop> : 8395 PatFrag<(ops node:$Rn, node:$Rm), 8396 (mul (ldop node:$Rn), (ldop node:$Rm))>; 8397 8398def load_v4i8 : 8399 OutPatFrag<(ops node:$R), 8400 (INSERT_SUBREG 8401 (v2i32 (IMPLICIT_DEF)), 8402 (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)), 8403 ssub)>; 8404 8405class dot_v4i8<Instruction DOT, SDPatternOperator ldop> : 8406 Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)), 8407 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)), 8408 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)), 8409 (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))), 8410 (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR), 8411 (load_v4i8 GPR64sp:$Rn), 8412 (load_v4i8 GPR64sp:$Rm))), 8413 sub_32)>, Requires<[HasDotProd]>; 8414 8415// dot_v8i8 8416class ee_v8i8<SDPatternOperator extend> : 8417 PatFrag<(ops node:$V, node:$K), 8418 (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>; 8419 8420class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 8421 PatFrag<(ops node:$M, node:$N, node:$K), 8422 (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)), 8423 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>; 8424 8425class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 8426 PatFrag<(ops node:$M, node:$N), 8427 (i32 (extractelt 8428 (v4i32 (AArch64uaddv 8429 (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)), 8430 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))), 8431 (i64 0)))>; 8432 8433// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 8434def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>; 8435 8436class odot_v8i8<Instruction DOT> : 8437 OutPatFrag<(ops node:$Vm, node:$Vn), 8438 (EXTRACT_SUBREG 8439 (VADDV_32 8440 (i64 (DOT (DUPv2i32gpr WZR), 8441 (v8i8 node:$Vm), 8442 (v8i8 node:$Vn)))), 8443 sub_32)>; 8444 8445class dot_v8i8<Instruction DOT, SDPatternOperator mulop, 8446 SDPatternOperator extend> : 8447 Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn), 8448 (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>, 8449 Requires<[HasDotProd]>; 8450 8451// dot_v16i8 8452class ee_v16i8<SDPatternOperator extend> : 8453 PatFrag<(ops node:$V, node:$K1, node:$K2), 8454 (v4i16 (extract_subvector 8455 (v8i16 (extend 8456 (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>; 8457 8458class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> : 8459 PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2), 8460 (v4i32 8461 (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)), 8462 (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>; 8463 8464class idot_v16i8<SDPatternOperator m, SDPatternOperator x> : 8465 PatFrag<(ops node:$M, node:$N), 8466 (i32 (extractelt 8467 (v4i32 (AArch64uaddv 8468 (add 8469 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)), 8470 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))), 8471 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)), 8472 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))), 8473 (i64 0)))>; 8474 8475class odot_v16i8<Instruction DOT> : 8476 OutPatFrag<(ops node:$Vm, node:$Vn), 8477 (i32 (ADDVv4i32v 8478 (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>; 8479 8480class dot_v16i8<Instruction DOT, SDPatternOperator mulop, 8481 SDPatternOperator extend> : 8482 Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn), 8483 (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>, 8484 Requires<[HasDotProd]>; 8485 8486let AddedComplexity = 10 in { 8487 def : dot_v4i8<SDOTv8i8, sextloadi8>; 8488 def : dot_v4i8<UDOTv8i8, zextloadi8>; 8489 def : dot_v8i8<SDOTv8i8, AArch64smull, sext>; 8490 def : dot_v8i8<UDOTv8i8, AArch64umull, zext>; 8491 def : dot_v16i8<SDOTv16i8, AArch64smull, sext>; 8492 def : dot_v16i8<UDOTv16i8, AArch64umull, zext>; 8493 8494 // FIXME: add patterns to generate vector by element dot product. 8495 // FIXME: add SVE dot-product patterns. 8496} 8497 8498// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs, 8499// so that it can be used as input to inline asm, and vice versa. 8500def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>; 8501def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>; 8502def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, 8503 GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)), 8504 (REG_SEQUENCE GPR64x8Class, 8505 $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, 8506 $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>; 8507foreach i = 0-7 in { 8508 def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))), 8509 (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>; 8510} 8511 8512let Predicates = [HasLS64] in { 8513 def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), 8514 (outs GPR64x8:$Rt)>; 8515 def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn), 8516 (outs)>; 8517 def ST64BV: Store64BV<0b011, "st64bv">; 8518 def ST64BV0: Store64BV<0b010, "st64bv0">; 8519 8520 class ST64BPattern<Intrinsic intrinsic, Instruction instruction> 8521 : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7), 8522 (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>; 8523 8524 def : ST64BPattern<int_aarch64_st64b, ST64B>; 8525 def : ST64BPattern<int_aarch64_st64bv, ST64BV>; 8526 def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>; 8527} 8528 8529let Predicates = [HasMOPS] in { 8530 let Defs = [NZCV] in { 8531 defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; 8532 8533 defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; 8534 8535 defm SETP : MOPSMemorySetInsns<0b00, "setp">; 8536 } 8537 let Uses = [NZCV] in { 8538 defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; 8539 defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; 8540 8541 defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; 8542 defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; 8543 8544 defm SETM : MOPSMemorySetInsns<0b01, "setm">; 8545 defm SETE : MOPSMemorySetInsns<0b10, "sete">; 8546 } 8547} 8548let Predicates = [HasMOPS, HasMTE] in { 8549 let Defs = [NZCV] in { 8550 defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; 8551 } 8552 let Uses = [NZCV] in { 8553 defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; 8554 // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td 8555 defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; 8556 } 8557} 8558 8559// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain 8560// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain 8561def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; 8562def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; 8563def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; 8564def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; 8565def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; 8566 8567// MOPS operations always contain three 4-byte instructions 8568let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { 8569 let mayLoad = 1 in { 8570 def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 8571 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 8572 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 8573 def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 8574 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 8575 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 8576 } 8577 let mayLoad = 0 in { 8578 def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 8579 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 8580 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 8581 } 8582} 8583let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in { 8584 def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 8585 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 8586 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 8587} 8588 8589// This gets lowered into an instruction sequence of 20 bytes 8590let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in 8591def StoreSwiftAsyncContext 8592 : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), 8593 []>, Sched<[]>; 8594 8595def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>; 8596def : Pat<(AArch64AssertZExtBool GPR32:$op), 8597 (i32 GPR32:$op)>; 8598 8599//===----------------------------===// 8600// 2022 Architecture Extensions: 8601//===----------------------------===// 8602 8603def : InstAlias<"clrbhb", (HINT 22), 0>; 8604let Predicates = [HasCLRBHB] in { 8605 def : InstAlias<"clrbhb", (HINT 22), 1>; 8606} 8607 8608//===----------------------------------------------------------------------===// 8609// Translation Hardening Extension (FEAT_THE) 8610//===----------------------------------------------------------------------===// 8611defm RCW : ReadCheckWriteCompareAndSwap; 8612 8613defm RCWCLR : ReadCheckWriteOperation<0b001, "clr">; 8614defm RCWSET : ReadCheckWriteOperation<0b011, "set">; 8615defm RCWSWP : ReadCheckWriteOperation<0b010, "swp">; 8616 8617//===----------------------------------------------------------------------===// 8618// General Data-Processing Instructions (FEAT_V94_DP) 8619//===----------------------------------------------------------------------===// 8620defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>; 8621defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>; 8622defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>; 8623 8624defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>; 8625defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>; 8626defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>; 8627defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>; 8628 8629def RPRFM: 8630 I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn), 8631 "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>, 8632 Sched<[]> { 8633 bits<6> Rt; 8634 bits<5> Rn; 8635 bits<5> Rm; 8636 let Inst{2-0} = Rt{2-0}; 8637 let Inst{4-3} = 0b11; 8638 let Inst{9-5} = Rn; 8639 let Inst{11-10} = 0b10; 8640 let Inst{13-12} = Rt{4-3}; 8641 let Inst{14} = 0b1; 8642 let Inst{15} = Rt{5}; 8643 let Inst{20-16} = Rm; 8644 let Inst{31-21} = 0b11111000101; 8645 let mayLoad = 0; 8646 let mayStore = 0; 8647 let hasSideEffects = 1; 8648 // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns 8649 // Fail, the decoder should attempt to decode RPRFM. This requires setting 8650 // the decoder namespace to "Fallback". 8651 let DecoderNamespace = "Fallback"; 8652} 8653 8654//===----------------------------------------------------------------------===// 8655// 128-bit Atomics (FEAT_LSE128) 8656//===----------------------------------------------------------------------===// 8657let Predicates = [HasLSE128] in { 8658 def SWPP : LSE128Base<0b000, 0b00, 0b1, "swpp">; 8659 def SWPPA : LSE128Base<0b000, 0b10, 0b1, "swppa">; 8660 def SWPPAL : LSE128Base<0b000, 0b11, 0b1, "swppal">; 8661 def SWPPL : LSE128Base<0b000, 0b01, 0b1, "swppl">; 8662 def LDCLRP : LSE128Base<0b001, 0b00, 0b0, "ldclrp">; 8663 def LDCLRPA : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">; 8664 def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">; 8665 def LDCLRPL : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">; 8666 def LDSETP : LSE128Base<0b011, 0b00, 0b0, "ldsetp">; 8667 def LDSETPA : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">; 8668 def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">; 8669 def LDSETPL : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">; 8670} 8671 8672//===----------------------------------------------------------------------===// 8673// RCPC Instructions (FEAT_LRCPC3) 8674//===----------------------------------------------------------------------===// 8675 8676let Predicates = [HasRCPC3] in { 8677 // size opc opc2 8678 def STILPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">; 8679 def STILPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">; 8680 def STILPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 8681 def STILPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 8682 def LDIAPPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">; 8683 def LDIAPPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">; 8684 def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 8685 def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 8686 8687 // Aliases for when offset=0 8688 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>; 8689 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>; 8690 8691 // size opc 8692 def STLRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">; 8693 def STLRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">; 8694 def LDAPRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #4", "$Rn = $wback">; 8695 def LDAPRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #8", "$Rn = $wback">; 8696} 8697 8698let Predicates = [HasRCPC3, HasNEON] in { 8699 // size opc regtype 8700 defm STLURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8 , (outs), (ins FPR8 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 8701 defm STLURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 8702 defm STLURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 8703 defm STLURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 8704 defm STLURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 8705 defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8 , (outs FPR8 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 8706 defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 8707 defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 8708 defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 8709 defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 8710 8711 // L 8712 def STL1: LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">; 8713 def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">; 8714 8715 // Aliases for when offset=0 8716 def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>; 8717} 8718 8719//===----------------------------------------------------------------------===// 8720// 128-bit System Instructions (FEAT_SYSINSTR128) 8721//===----------------------------------------------------------------------===// 8722let Predicates = [HasD128] in { 8723 def SYSPxt : SystemPXtI<0, "sysp">; 8724 8725 def SYSPxt_XZR 8726 : BaseSystemI<0, (outs), 8727 (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair), 8728 "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">, 8729 Sched<[WriteSys]> 8730 { 8731 // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?) 8732 // and therefore autogenerates a decoder that builds an MC representation that has 4 fields 8733 // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one 8734 // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc 8735 // is based off of the asm template (maybe) and therefore wants to print 5 operands. 8736 // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would 8737 // overlap with the main SYSP instruction. 8738 let DecoderMethod = "DecodeSyspXzrInstruction"; 8739 bits<3> op1; 8740 bits<4> Cn; 8741 bits<4> Cm; 8742 bits<3> op2; 8743 let Inst{22} = 0b1; // override BaseSystemI 8744 let Inst{20-19} = 0b01; 8745 let Inst{18-16} = op1; 8746 let Inst{15-12} = Cn; 8747 let Inst{11-8} = Cm; 8748 let Inst{7-5} = op2; 8749 let Inst{4-0} = 0b11111; 8750 } 8751 8752 def : InstAlias<"sysp $op1, $Cn, $Cm, $op2", 8753 (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 8754} 8755 8756//--- 8757// 128-bit System Registers (FEAT_SYSREG128) 8758//--- 8759 8760// Instruction encoding: 8761// 8762// 31 22|21|20|19|18 16|15 12|11 8|7 5|4 0 8763// MRRS 1101010101| 1| 1|o0| op1| Cn| Cm|op2| Rt 8764// MSRR 1101010101| 0| 1|o0| op1| Cn| Cm|op2| Rt 8765 8766// Instruction syntax: 8767// 8768// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>> 8769// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1> 8770// 8771// ...where t is even (X0, X2, etc). 8772 8773let Predicates = [HasD128] in { 8774 def MRRS : RtSystemI128<1, 8775 (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg), 8776 "mrrs", "\t$Rt, $systemreg"> 8777 { 8778 bits<16> systemreg; 8779 let Inst{20-5} = systemreg; 8780 } 8781 8782 def MSRR : RtSystemI128<0, 8783 (outs), (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt), 8784 "msrr", "\t$systemreg, $Rt"> 8785 { 8786 bits<16> systemreg; 8787 let Inst{20-5} = systemreg; 8788 } 8789} 8790 8791 8792include "AArch64InstrAtomics.td" 8793include "AArch64SVEInstrInfo.td" 8794include "AArch64SMEInstrInfo.td" 8795include "AArch64InstrGISel.td" 8796