1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the ARM NEON instruction set. 10// 11//===----------------------------------------------------------------------===// 12 13 14//===----------------------------------------------------------------------===// 15// NEON-specific Operands. 16//===----------------------------------------------------------------------===// 17def nModImm : Operand<i32> { 18 let PrintMethod = "printVMOVModImmOperand"; 19} 20 21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 22def nImmSplatI8 : Operand<i32> { 23 let PrintMethod = "printVMOVModImmOperand"; 24 let ParserMatchClass = nImmSplatI8AsmOperand; 25} 26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 27def nImmSplatI16 : Operand<i32> { 28 let PrintMethod = "printVMOVModImmOperand"; 29 let ParserMatchClass = nImmSplatI16AsmOperand; 30} 31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 32def nImmSplatI32 : Operand<i32> { 33 let PrintMethod = "printVMOVModImmOperand"; 34 let ParserMatchClass = nImmSplatI32AsmOperand; 35} 36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 37def nImmSplatNotI16 : Operand<i32> { 38 let ParserMatchClass = nImmSplatNotI16AsmOperand; 39} 40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 41def nImmSplatNotI32 : Operand<i32> { 42 let ParserMatchClass = nImmSplatNotI32AsmOperand; 43} 44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 45def nImmVMOVI32 : Operand<i32> { 46 let PrintMethod = "printVMOVModImmOperand"; 47 let ParserMatchClass = nImmVMOVI32AsmOperand; 48} 49 50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To> 51 : AsmOperandClass { 52 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate"; 53 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">"; 54 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands"; 55} 56 57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To> 58 : AsmOperandClass { 59 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate"; 60 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">"; 61 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands"; 62} 63 64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> { 65 let PrintMethod = "printVMOVModImmOperand"; 66 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>; 67} 68 69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> { 70 let PrintMethod = "printVMOVModImmOperand"; 71 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>; 72} 73 74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 75def nImmVMOVI32Neg : Operand<i32> { 76 let PrintMethod = "printVMOVModImmOperand"; 77 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 78} 79def nImmVMOVF32 : Operand<i32> { 80 let PrintMethod = "printFPImmOperand"; 81 let ParserMatchClass = FPImmOperand; 82} 83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 84def nImmSplatI64 : Operand<i32> { 85 let PrintMethod = "printVMOVModImmOperand"; 86 let ParserMatchClass = nImmSplatI64AsmOperand; 87} 88 89def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } 93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 94 return ((uint64_t)Imm) < 8; 95}]> { 96 let ParserMatchClass = VectorIndex8Operand; 97 let PrintMethod = "printVectorIndex"; 98 let MIOperandInfo = (ops i32imm); 99} 100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 101 return ((uint64_t)Imm) < 4; 102}]> { 103 let ParserMatchClass = VectorIndex16Operand; 104 let PrintMethod = "printVectorIndex"; 105 let MIOperandInfo = (ops i32imm); 106} 107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 108 return ((uint64_t)Imm) < 2; 109}]> { 110 let ParserMatchClass = VectorIndex32Operand; 111 let PrintMethod = "printVectorIndex"; 112 let MIOperandInfo = (ops i32imm); 113} 114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{ 115 return ((uint64_t)Imm) < 1; 116}]> { 117 let ParserMatchClass = VectorIndex64Operand; 118 let PrintMethod = "printVectorIndex"; 119 let MIOperandInfo = (ops i32imm); 120} 121 122// Register list of one D register. 123def VecListOneDAsmOperand : AsmOperandClass { 124 let Name = "VecListOneD"; 125 let ParserMethod = "parseVectorList"; 126 let RenderMethod = "addVecListOperands"; 127} 128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 129 let ParserMatchClass = VecListOneDAsmOperand; 130} 131// Register list of two sequential D registers. 132def VecListDPairAsmOperand : AsmOperandClass { 133 let Name = "VecListDPair"; 134 let ParserMethod = "parseVectorList"; 135 let RenderMethod = "addVecListOperands"; 136} 137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 138 let ParserMatchClass = VecListDPairAsmOperand; 139} 140// Register list of three sequential D registers. 141def VecListThreeDAsmOperand : AsmOperandClass { 142 let Name = "VecListThreeD"; 143 let ParserMethod = "parseVectorList"; 144 let RenderMethod = "addVecListOperands"; 145} 146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 147 let ParserMatchClass = VecListThreeDAsmOperand; 148} 149// Register list of four sequential D registers. 150def VecListFourDAsmOperand : AsmOperandClass { 151 let Name = "VecListFourD"; 152 let ParserMethod = "parseVectorList"; 153 let RenderMethod = "addVecListOperands"; 154} 155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 156 let ParserMatchClass = VecListFourDAsmOperand; 157} 158// Register list of two D registers spaced by 2 (two sequential Q registers). 159def VecListDPairSpacedAsmOperand : AsmOperandClass { 160 let Name = "VecListDPairSpaced"; 161 let ParserMethod = "parseVectorList"; 162 let RenderMethod = "addVecListOperands"; 163} 164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 165 let ParserMatchClass = VecListDPairSpacedAsmOperand; 166} 167// Register list of three D registers spaced by 2 (three Q registers). 168def VecListThreeQAsmOperand : AsmOperandClass { 169 let Name = "VecListThreeQ"; 170 let ParserMethod = "parseVectorList"; 171 let RenderMethod = "addVecListOperands"; 172} 173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 174 let ParserMatchClass = VecListThreeQAsmOperand; 175} 176// Register list of three D registers spaced by 2 (three Q registers). 177def VecListFourQAsmOperand : AsmOperandClass { 178 let Name = "VecListFourQ"; 179 let ParserMethod = "parseVectorList"; 180 let RenderMethod = "addVecListOperands"; 181} 182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 183 let ParserMatchClass = VecListFourQAsmOperand; 184} 185 186// Register list of one D register, with "all lanes" subscripting. 187def VecListOneDAllLanesAsmOperand : AsmOperandClass { 188 let Name = "VecListOneDAllLanes"; 189 let ParserMethod = "parseVectorList"; 190 let RenderMethod = "addVecListOperands"; 191} 192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 193 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 194} 195// Register list of two D registers, with "all lanes" subscripting. 196def VecListDPairAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListDPairAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200} 201def VecListDPairAllLanes : RegisterOperand<DPair, 202 "printVectorListTwoAllLanes"> { 203 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 204} 205// Register list of two D registers spaced by 2 (two sequential Q registers). 206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 207 let Name = "VecListDPairSpacedAllLanes"; 208 let ParserMethod = "parseVectorList"; 209 let RenderMethod = "addVecListOperands"; 210} 211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc, 212 "printVectorListTwoSpacedAllLanes"> { 213 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 214} 215// Register list of three D registers, with "all lanes" subscripting. 216def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 217 let Name = "VecListThreeDAllLanes"; 218 let ParserMethod = "parseVectorList"; 219 let RenderMethod = "addVecListOperands"; 220} 221def VecListThreeDAllLanes : RegisterOperand<DPR, 222 "printVectorListThreeAllLanes"> { 223 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 224} 225// Register list of three D registers spaced by 2 (three sequential Q regs). 226def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 227 let Name = "VecListThreeQAllLanes"; 228 let ParserMethod = "parseVectorList"; 229 let RenderMethod = "addVecListOperands"; 230} 231def VecListThreeQAllLanes : RegisterOperand<DPR, 232 "printVectorListThreeSpacedAllLanes"> { 233 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 234} 235// Register list of four D registers, with "all lanes" subscripting. 236def VecListFourDAllLanesAsmOperand : AsmOperandClass { 237 let Name = "VecListFourDAllLanes"; 238 let ParserMethod = "parseVectorList"; 239 let RenderMethod = "addVecListOperands"; 240} 241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 242 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 243} 244// Register list of four D registers spaced by 2 (four sequential Q regs). 245def VecListFourQAllLanesAsmOperand : AsmOperandClass { 246 let Name = "VecListFourQAllLanes"; 247 let ParserMethod = "parseVectorList"; 248 let RenderMethod = "addVecListOperands"; 249} 250def VecListFourQAllLanes : RegisterOperand<DPR, 251 "printVectorListFourSpacedAllLanes"> { 252 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 253} 254 255 256// Register list of one D register, with byte lane subscripting. 257def VecListOneDByteIndexAsmOperand : AsmOperandClass { 258 let Name = "VecListOneDByteIndexed"; 259 let ParserMethod = "parseVectorList"; 260 let RenderMethod = "addVecListIndexedOperands"; 261} 262def VecListOneDByteIndexed : Operand<i32> { 263 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 264 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 265} 266// ...with half-word lane subscripting. 267def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 268 let Name = "VecListOneDHWordIndexed"; 269 let ParserMethod = "parseVectorList"; 270 let RenderMethod = "addVecListIndexedOperands"; 271} 272def VecListOneDHWordIndexed : Operand<i32> { 273 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 274 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 275} 276// ...with word lane subscripting. 277def VecListOneDWordIndexAsmOperand : AsmOperandClass { 278 let Name = "VecListOneDWordIndexed"; 279 let ParserMethod = "parseVectorList"; 280 let RenderMethod = "addVecListIndexedOperands"; 281} 282def VecListOneDWordIndexed : Operand<i32> { 283 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 284 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 285} 286 287// Register list of two D registers with byte lane subscripting. 288def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoDByteIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListTwoDByteIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297// ...with half-word lane subscripting. 298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 299 let Name = "VecListTwoDHWordIndexed"; 300 let ParserMethod = "parseVectorList"; 301 let RenderMethod = "addVecListIndexedOperands"; 302} 303def VecListTwoDHWordIndexed : Operand<i32> { 304 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 305 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 306} 307// ...with word lane subscripting. 308def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 309 let Name = "VecListTwoDWordIndexed"; 310 let ParserMethod = "parseVectorList"; 311 let RenderMethod = "addVecListIndexedOperands"; 312} 313def VecListTwoDWordIndexed : Operand<i32> { 314 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 315 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 316} 317// Register list of two Q registers with half-word lane subscripting. 318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 319 let Name = "VecListTwoQHWordIndexed"; 320 let ParserMethod = "parseVectorList"; 321 let RenderMethod = "addVecListIndexedOperands"; 322} 323def VecListTwoQHWordIndexed : Operand<i32> { 324 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 325 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 326} 327// ...with word lane subscripting. 328def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 329 let Name = "VecListTwoQWordIndexed"; 330 let ParserMethod = "parseVectorList"; 331 let RenderMethod = "addVecListIndexedOperands"; 332} 333def VecListTwoQWordIndexed : Operand<i32> { 334 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 335 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 336} 337 338 339// Register list of three D registers with byte lane subscripting. 340def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeDByteIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344} 345def VecListThreeDByteIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348} 349// ...with half-word lane subscripting. 350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 351 let Name = "VecListThreeDHWordIndexed"; 352 let ParserMethod = "parseVectorList"; 353 let RenderMethod = "addVecListIndexedOperands"; 354} 355def VecListThreeDHWordIndexed : Operand<i32> { 356 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 357 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 358} 359// ...with word lane subscripting. 360def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 361 let Name = "VecListThreeDWordIndexed"; 362 let ParserMethod = "parseVectorList"; 363 let RenderMethod = "addVecListIndexedOperands"; 364} 365def VecListThreeDWordIndexed : Operand<i32> { 366 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 367 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 368} 369// Register list of three Q registers with half-word lane subscripting. 370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 371 let Name = "VecListThreeQHWordIndexed"; 372 let ParserMethod = "parseVectorList"; 373 let RenderMethod = "addVecListIndexedOperands"; 374} 375def VecListThreeQHWordIndexed : Operand<i32> { 376 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 377 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 378} 379// ...with word lane subscripting. 380def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 381 let Name = "VecListThreeQWordIndexed"; 382 let ParserMethod = "parseVectorList"; 383 let RenderMethod = "addVecListIndexedOperands"; 384} 385def VecListThreeQWordIndexed : Operand<i32> { 386 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 387 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 388} 389 390// Register list of four D registers with byte lane subscripting. 391def VecListFourDByteIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourDByteIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListFourDByteIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400// ...with half-word lane subscripting. 401def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 402 let Name = "VecListFourDHWordIndexed"; 403 let ParserMethod = "parseVectorList"; 404 let RenderMethod = "addVecListIndexedOperands"; 405} 406def VecListFourDHWordIndexed : Operand<i32> { 407 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 408 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 409} 410// ...with word lane subscripting. 411def VecListFourDWordIndexAsmOperand : AsmOperandClass { 412 let Name = "VecListFourDWordIndexed"; 413 let ParserMethod = "parseVectorList"; 414 let RenderMethod = "addVecListIndexedOperands"; 415} 416def VecListFourDWordIndexed : Operand<i32> { 417 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 418 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 419} 420// Register list of four Q registers with half-word lane subscripting. 421def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 422 let Name = "VecListFourQHWordIndexed"; 423 let ParserMethod = "parseVectorList"; 424 let RenderMethod = "addVecListIndexedOperands"; 425} 426def VecListFourQHWordIndexed : Operand<i32> { 427 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 428 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 429} 430// ...with word lane subscripting. 431def VecListFourQWordIndexAsmOperand : AsmOperandClass { 432 let Name = "VecListFourQWordIndexed"; 433 let ParserMethod = "parseVectorList"; 434 let RenderMethod = "addVecListIndexedOperands"; 435} 436def VecListFourQWordIndexed : Operand<i32> { 437 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 438 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 439} 440 441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 442 return cast<LoadSDNode>(N)->getAlign() >= 8; 443}]>; 444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 445 (store node:$val, node:$ptr), [{ 446 return cast<StoreSDNode>(N)->getAlign() >= 8; 447}]>; 448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 449 return cast<LoadSDNode>(N)->getAlign() == 4; 450}]>; 451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 452 (store node:$val, node:$ptr), [{ 453 return cast<StoreSDNode>(N)->getAlign() == 4; 454}]>; 455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 456 return cast<LoadSDNode>(N)->getAlign() == 2; 457}]>; 458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 459 (store node:$val, node:$ptr), [{ 460 return cast<StoreSDNode>(N)->getAlign() == 2; 461}]>; 462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 463 return cast<LoadSDNode>(N)->getAlign() == 1; 464}]>; 465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 466 (store node:$val, node:$ptr), [{ 467 return cast<StoreSDNode>(N)->getAlign() == 1; 468}]>; 469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 470 return cast<LoadSDNode>(N)->getAlign() < 4; 471}]>; 472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 473 (store node:$val, node:$ptr), [{ 474 return cast<StoreSDNode>(N)->getAlign() < 4; 475}]>; 476 477//===----------------------------------------------------------------------===// 478// NEON-specific DAG Nodes. 479//===----------------------------------------------------------------------===// 480 481def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 482def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; 483 484// Types for vector shift by immediates. The "SHX" version is for long and 485// narrow operations where the source and destination vectors have different 486// types. The "SHINS" version is for shift and insert operations. 487def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 488 SDTCisVT<2, i32>]>; 489def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 490 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 491 492def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; 493 494def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; 495def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>; 496def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>; 497 498def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>; 499def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>; 500def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>; 501def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>; 502def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>; 503def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>; 504 505def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>; 506def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>; 507def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; 508 509def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; 510def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; 511 512def NEONvbsp : SDNode<"ARMISD::VBSP", 513 SDTypeProfile<1, 3, [SDTCisVec<0>, 514 SDTCisSameAs<0, 1>, 515 SDTCisSameAs<0, 2>, 516 SDTCisSameAs<0, 3>]>>; 517 518def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 519 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 520def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 521 522def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 523 SDTCisSameAs<0, 2>, 524 SDTCisSameAs<0, 3>]>; 525def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 526def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 527def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 528 529def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 530 SDTCisVT<2, v8i8>]>; 531def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 532 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; 533def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; 534def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; 535 536 537//===----------------------------------------------------------------------===// 538// NEON load / store instructions 539//===----------------------------------------------------------------------===// 540 541// Use VLDM to load a Q register as a D register pair. 542// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 543def VLDMQIA 544 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 545 IIC_fpLoad_m, "", 546 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; 547 548// Use VSTM to store a Q register as a D register pair. 549// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 550def VSTMQIA 551 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 552 IIC_fpStore_m, "", 553 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; 554 555// Classes for VLD* pseudo-instructions with multi-register operands. 556// These are expanded to real instructions after register allocation. 557class VLDQPseudo<InstrItinClass itin> 558 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 559class VLDQWBPseudo<InstrItinClass itin> 560 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 561 (ins addrmode6:$addr, am6offset:$offset), itin, 562 "$addr.addr = $wb">; 563class VLDQWBfixedPseudo<InstrItinClass itin> 564 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 565 (ins addrmode6:$addr), itin, 566 "$addr.addr = $wb">; 567class VLDQWBregisterPseudo<InstrItinClass itin> 568 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 569 (ins addrmode6:$addr, rGPR:$offset), itin, 570 "$addr.addr = $wb">; 571 572class VLDQQPseudo<InstrItinClass itin> 573 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 574class VLDQQWBPseudo<InstrItinClass itin> 575 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 576 (ins addrmode6:$addr, am6offset:$offset), itin, 577 "$addr.addr = $wb">; 578class VLDQQWBfixedPseudo<InstrItinClass itin> 579 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 580 (ins addrmode6:$addr), itin, 581 "$addr.addr = $wb">; 582class VLDQQWBregisterPseudo<InstrItinClass itin> 583 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 584 (ins addrmode6:$addr, rGPR:$offset), itin, 585 "$addr.addr = $wb">; 586 587 588class VLDQQQQPseudo<InstrItinClass itin> 589 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 590 "$src = $dst">; 591class VLDQQQQWBPseudo<InstrItinClass itin> 592 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 593 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 594 "$addr.addr = $wb, $src = $dst">; 595 596let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 597 598// VLD1 : Vector Load (multiple single elements) 599class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 600 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 601 (ins AddrMode:$Rn), IIC_VLD1, 602 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { 603 let Rm = 0b1111; 604 let Inst{4} = Rn{4}; 605 let DecoderMethod = "DecodeVLDST1Instruction"; 606} 607class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 608 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 609 (ins AddrMode:$Rn), IIC_VLD1x2, 610 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { 611 let Rm = 0b1111; 612 let Inst{5-4} = Rn{5-4}; 613 let DecoderMethod = "DecodeVLDST1Instruction"; 614} 615 616def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 617def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 618def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 619def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 620 621def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 622def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 623def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 624def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 625 626// ...with address register writeback: 627multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 628 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 629 (ins AddrMode:$Rn), IIC_VLD1u, 630 "vld1", Dt, "$Vd, $Rn!", 631 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 632 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 633 let Inst{4} = Rn{4}; 634 let DecoderMethod = "DecodeVLDST1Instruction"; 635 } 636 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 637 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 638 "vld1", Dt, "$Vd, $Rn, $Rm", 639 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 640 let Inst{4} = Rn{4}; 641 let DecoderMethod = "DecodeVLDST1Instruction"; 642 } 643} 644multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 645 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 646 (ins AddrMode:$Rn), IIC_VLD1x2u, 647 "vld1", Dt, "$Vd, $Rn!", 648 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 649 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 650 let Inst{5-4} = Rn{5-4}; 651 let DecoderMethod = "DecodeVLDST1Instruction"; 652 } 653 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 654 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 655 "vld1", Dt, "$Vd, $Rn, $Rm", 656 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 657 let Inst{5-4} = Rn{5-4}; 658 let DecoderMethod = "DecodeVLDST1Instruction"; 659 } 660} 661 662defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 663defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 664defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 665defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 666defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 667defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 668defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 669defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 670 671// ...with 3 registers 672class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 673 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 674 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 675 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { 676 let Rm = 0b1111; 677 let Inst{4} = Rn{4}; 678 let DecoderMethod = "DecodeVLDST1Instruction"; 679} 680multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 681 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 682 (ins AddrMode:$Rn), IIC_VLD1x2u, 683 "vld1", Dt, "$Vd, $Rn!", 684 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 685 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 686 let Inst{4} = Rn{4}; 687 let DecoderMethod = "DecodeVLDST1Instruction"; 688 } 689 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 690 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 691 "vld1", Dt, "$Vd, $Rn, $Rm", 692 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 693 let Inst{4} = Rn{4}; 694 let DecoderMethod = "DecodeVLDST1Instruction"; 695 } 696} 697 698def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 699def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 700def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 701def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 702 703defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 704defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 705defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 706defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 707 708def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 709def VLD1d8TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 710def VLD1d8TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 711def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 712def VLD1d16TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 713def VLD1d16TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 714def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 715def VLD1d32TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 716def VLD1d32TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 717def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 718def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 719def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 720 721def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 722def VLD1q8HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 723def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 724def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 725def VLD1q16HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 726def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 727def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 728def VLD1q32HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 729def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 730def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 731def VLD1q64HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 732def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 733 734// ...with 4 registers 735class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 736 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 737 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 738 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { 739 let Rm = 0b1111; 740 let Inst{5-4} = Rn{5-4}; 741 let DecoderMethod = "DecodeVLDST1Instruction"; 742} 743multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 744 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 745 (ins AddrMode:$Rn), IIC_VLD1x2u, 746 "vld1", Dt, "$Vd, $Rn!", 747 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 748 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 749 let Inst{5-4} = Rn{5-4}; 750 let DecoderMethod = "DecodeVLDST1Instruction"; 751 } 752 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 753 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 754 "vld1", Dt, "$Vd, $Rn, $Rm", 755 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 756 let Inst{5-4} = Rn{5-4}; 757 let DecoderMethod = "DecodeVLDST1Instruction"; 758 } 759} 760 761def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 762def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 763def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 764def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 765 766defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 767defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 768defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 769defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 770 771def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 772def VLD1d8QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 773def VLD1d8QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 774def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 775def VLD1d16QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 776def VLD1d16QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 777def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 778def VLD1d32QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 779def VLD1d32QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 780def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 781def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 782def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 783 784def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 785def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 786def VLD1q8HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 787def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 788def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 789def VLD1q16HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 790def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 791def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 792def VLD1q32HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 793def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 794def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 795def VLD1q64HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 796 797// VLD2 : Vector Load (multiple 2-element structures) 798class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 799 InstrItinClass itin, Operand AddrMode> 800 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 801 (ins AddrMode:$Rn), itin, 802 "vld2", Dt, "$Vd, $Rn", "", []> { 803 let Rm = 0b1111; 804 let Inst{5-4} = Rn{5-4}; 805 let DecoderMethod = "DecodeVLDST2Instruction"; 806} 807 808def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 809 addrmode6align64or128>, Sched<[WriteVLD2]>; 810def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 811 addrmode6align64or128>, Sched<[WriteVLD2]>; 812def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 813 addrmode6align64or128>, Sched<[WriteVLD2]>; 814 815def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 816 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 817def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 818 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 819def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 820 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 821 822def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 823def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 824def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 825 826// ...with address register writeback: 827multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 828 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 829 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 830 (ins AddrMode:$Rn), itin, 831 "vld2", Dt, "$Vd, $Rn!", 832 "$Rn.addr = $wb", []> { 833 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 834 let Inst{5-4} = Rn{5-4}; 835 let DecoderMethod = "DecodeVLDST2Instruction"; 836 } 837 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 838 (ins AddrMode:$Rn, rGPR:$Rm), itin, 839 "vld2", Dt, "$Vd, $Rn, $Rm", 840 "$Rn.addr = $wb", []> { 841 let Inst{5-4} = Rn{5-4}; 842 let DecoderMethod = "DecodeVLDST2Instruction"; 843 } 844} 845 846defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 847 addrmode6align64or128>, Sched<[WriteVLD2]>; 848defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 849 addrmode6align64or128>, Sched<[WriteVLD2]>; 850defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 851 addrmode6align64or128>, Sched<[WriteVLD2]>; 852 853defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 854 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 855defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 856 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 857defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 858 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 859 860def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 861def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 862def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 863def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 864def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 865def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 866 867// ...with double-spaced registers 868def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 869 addrmode6align64or128>, Sched<[WriteVLD2]>; 870def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 871 addrmode6align64or128>, Sched<[WriteVLD2]>; 872def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 873 addrmode6align64or128>, Sched<[WriteVLD2]>; 874defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 875 addrmode6align64or128>, Sched<[WriteVLD2]>; 876defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 877 addrmode6align64or128>, Sched<[WriteVLD2]>; 878defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 879 addrmode6align64or128>, Sched<[WriteVLD2]>; 880 881// VLD3 : Vector Load (multiple 3-element structures) 882class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 883 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 884 (ins addrmode6:$Rn), IIC_VLD3, 885 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { 886 let Rm = 0b1111; 887 let Inst{4} = Rn{4}; 888 let DecoderMethod = "DecodeVLDST3Instruction"; 889} 890 891def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 892def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 893def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 894 895def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 896def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 897def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 898 899// ...with address register writeback: 900class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 901 : NLdSt<0, 0b10, op11_8, op7_4, 902 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 903 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 904 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 905 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 906 let Inst{4} = Rn{4}; 907 let DecoderMethod = "DecodeVLDST3Instruction"; 908} 909 910def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 911def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 912def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 913 914def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 915def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 916def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 917 918// ...with double-spaced registers: 919def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 920def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 921def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 922def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 923def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 924def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 925 926def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 927def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 928def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 929 930// ...alternate versions to be allocated odd register numbers: 931def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 932def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 933def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 934 935def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 936def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 937def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 938 939// VLD4 : Vector Load (multiple 4-element structures) 940class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 941 : NLdSt<0, 0b10, op11_8, op7_4, 942 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 943 (ins addrmode6:$Rn), IIC_VLD4, 944 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, 945 Sched<[WriteVLD4]> { 946 let Rm = 0b1111; 947 let Inst{5-4} = Rn{5-4}; 948 let DecoderMethod = "DecodeVLDST4Instruction"; 949} 950 951def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 952def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 953def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 954 955def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 956def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 957def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 958 959// ...with address register writeback: 960class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 961 : NLdSt<0, 0b10, op11_8, op7_4, 962 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 963 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 964 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 965 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 966 let Inst{5-4} = Rn{5-4}; 967 let DecoderMethod = "DecodeVLDST4Instruction"; 968} 969 970def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 971def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 972def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 973 974def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 975def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 976def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 977 978// ...with double-spaced registers: 979def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 980def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 981def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 982def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 983def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 984def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 985 986def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 987def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 988def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 989 990// ...alternate versions to be allocated odd register numbers: 991def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 992def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 993def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 994 995def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 996def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 997def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 998 999} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1000 1001// Classes for VLD*LN pseudo-instructions with multi-register operands. 1002// These are expanded to real instructions after register allocation. 1003class VLDQLNPseudo<InstrItinClass itin> 1004 : PseudoNLdSt<(outs QPR:$dst), 1005 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1006 itin, "$src = $dst">; 1007class VLDQLNWBPseudo<InstrItinClass itin> 1008 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 1009 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1010 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1011class VLDQQLNPseudo<InstrItinClass itin> 1012 : PseudoNLdSt<(outs QQPR:$dst), 1013 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1014 itin, "$src = $dst">; 1015class VLDQQLNWBPseudo<InstrItinClass itin> 1016 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 1017 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1018 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1019class VLDQQQQLNPseudo<InstrItinClass itin> 1020 : PseudoNLdSt<(outs QQQQPR:$dst), 1021 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1022 itin, "$src = $dst">; 1023class VLDQQQQLNWBPseudo<InstrItinClass itin> 1024 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1025 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1026 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1027 1028// VLD1LN : Vector Load (single element to one lane) 1029class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1030 PatFrag LoadOp> 1031 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1032 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1033 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1034 "$src = $Vd", 1035 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1036 (i32 (LoadOp addrmode6:$Rn)), 1037 imm:$lane))]> { 1038 let Rm = 0b1111; 1039 let DecoderMethod = "DecodeVLD1LN"; 1040} 1041class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1042 PatFrag LoadOp> 1043 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1044 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1045 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1046 "$src = $Vd", 1047 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1048 (i32 (LoadOp addrmode6oneL32:$Rn)), 1049 imm:$lane))]>, Sched<[WriteVLD1]> { 1050 let Rm = 0b1111; 1051 let DecoderMethod = "DecodeVLD1LN"; 1052} 1053class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>, 1054 Sched<[WriteVLD1]> { 1055 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1056 (i32 (LoadOp addrmode6:$addr)), 1057 imm:$lane))]; 1058} 1059 1060def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1061 let Inst{7-5} = lane{2-0}; 1062} 1063def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1064 let Inst{7-6} = lane{1-0}; 1065 let Inst{5-4} = Rn{5-4}; 1066} 1067def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1068 let Inst{7} = lane{0}; 1069 let Inst{5-4} = Rn{5-4}; 1070} 1071 1072def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1073def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1074def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1075 1076let Predicates = [HasNEON] in { 1077def : Pat<(vector_insert (v4f16 DPR:$src), 1078 (f16 (load addrmode6:$addr)), imm:$lane), 1079 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1080def : Pat<(vector_insert (v8f16 QPR:$src), 1081 (f16 (load addrmode6:$addr)), imm:$lane), 1082 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1083def : Pat<(vector_insert (v4bf16 DPR:$src), 1084 (bf16 (load addrmode6:$addr)), imm:$lane), 1085 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1086def : Pat<(vector_insert (v8bf16 QPR:$src), 1087 (bf16 (load addrmode6:$addr)), imm:$lane), 1088 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1089def : Pat<(vector_insert (v2f32 DPR:$src), 1090 (f32 (load addrmode6:$addr)), imm:$lane), 1091 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1092def : Pat<(vector_insert (v4f32 QPR:$src), 1093 (f32 (load addrmode6:$addr)), imm:$lane), 1094 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1095 1096// A 64-bit subvector insert to the first 128-bit vector position 1097// is a subregister copy that needs no instruction. 1098def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), 1099 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1100def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), 1101 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1102def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), 1103 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1104def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), 1105 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1106def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), 1107 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1108def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), 1109 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1110} 1111 1112 1113let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1114 1115// ...with address register writeback: 1116class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1117 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1118 (ins addrmode6:$Rn, am6offset:$Rm, 1119 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1120 "\\{$Vd[$lane]\\}, $Rn$Rm", 1121 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1122 let DecoderMethod = "DecodeVLD1LN"; 1123} 1124 1125def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1126 let Inst{7-5} = lane{2-0}; 1127} 1128def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1129 let Inst{7-6} = lane{1-0}; 1130 let Inst{4} = Rn{4}; 1131} 1132def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1133 let Inst{7} = lane{0}; 1134 let Inst{5} = Rn{4}; 1135 let Inst{4} = Rn{4}; 1136} 1137 1138def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1139def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1140def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1141 1142// VLD2LN : Vector Load (single 2-element structure to one lane) 1143class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1144 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1145 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1146 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1147 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { 1148 let Rm = 0b1111; 1149 let Inst{4} = Rn{4}; 1150 let DecoderMethod = "DecodeVLD2LN"; 1151} 1152 1153def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1154 let Inst{7-5} = lane{2-0}; 1155} 1156def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1157 let Inst{7-6} = lane{1-0}; 1158} 1159def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1160 let Inst{7} = lane{0}; 1161} 1162 1163def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1164def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1165def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1166 1167// ...with double-spaced registers: 1168def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1169 let Inst{7-6} = lane{1-0}; 1170} 1171def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1172 let Inst{7} = lane{0}; 1173} 1174 1175def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1176def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1177 1178// ...with address register writeback: 1179class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1180 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1181 (ins addrmode6:$Rn, am6offset:$Rm, 1182 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1183 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1184 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1185 let Inst{4} = Rn{4}; 1186 let DecoderMethod = "DecodeVLD2LN"; 1187} 1188 1189def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1190 let Inst{7-5} = lane{2-0}; 1191} 1192def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1193 let Inst{7-6} = lane{1-0}; 1194} 1195def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1196 let Inst{7} = lane{0}; 1197} 1198 1199def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1200def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1201def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1202 1203def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1204 let Inst{7-6} = lane{1-0}; 1205} 1206def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1207 let Inst{7} = lane{0}; 1208} 1209 1210def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1211def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1212 1213// VLD3LN : Vector Load (single 3-element structure to one lane) 1214class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1215 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1216 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1217 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1218 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1219 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { 1220 let Rm = 0b1111; 1221 let DecoderMethod = "DecodeVLD3LN"; 1222} 1223 1224def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1225 let Inst{7-5} = lane{2-0}; 1226} 1227def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1228 let Inst{7-6} = lane{1-0}; 1229} 1230def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1231 let Inst{7} = lane{0}; 1232} 1233 1234def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1235def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1236def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1237 1238// ...with double-spaced registers: 1239def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1240 let Inst{7-6} = lane{1-0}; 1241} 1242def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1243 let Inst{7} = lane{0}; 1244} 1245 1246def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1247def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1248 1249// ...with address register writeback: 1250class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1251 : NLdStLn<1, 0b10, op11_8, op7_4, 1252 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1253 (ins addrmode6:$Rn, am6offset:$Rm, 1254 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1255 IIC_VLD3lnu, "vld3", Dt, 1256 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1257 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1258 []>, Sched<[WriteVLD2]> { 1259 let DecoderMethod = "DecodeVLD3LN"; 1260} 1261 1262def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1263 let Inst{7-5} = lane{2-0}; 1264} 1265def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1266 let Inst{7-6} = lane{1-0}; 1267} 1268def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1269 let Inst{7} = lane{0}; 1270} 1271 1272def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1273def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1274def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1275 1276def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1277 let Inst{7-6} = lane{1-0}; 1278} 1279def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1280 let Inst{7} = lane{0}; 1281} 1282 1283def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1284def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1285 1286// VLD4LN : Vector Load (single 4-element structure to one lane) 1287class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1288 : NLdStLn<1, 0b10, op11_8, op7_4, 1289 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1290 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1291 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1292 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1293 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, 1294 Sched<[WriteVLD2]> { 1295 let Rm = 0b1111; 1296 let Inst{4} = Rn{4}; 1297 let DecoderMethod = "DecodeVLD4LN"; 1298} 1299 1300def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1301 let Inst{7-5} = lane{2-0}; 1302} 1303def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1304 let Inst{7-6} = lane{1-0}; 1305} 1306def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1307 let Inst{7} = lane{0}; 1308 let Inst{5} = Rn{5}; 1309} 1310 1311def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1312def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1313def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1314 1315// ...with double-spaced registers: 1316def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1317 let Inst{7-6} = lane{1-0}; 1318} 1319def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1320 let Inst{7} = lane{0}; 1321 let Inst{5} = Rn{5}; 1322} 1323 1324def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1325def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1326 1327// ...with address register writeback: 1328class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1329 : NLdStLn<1, 0b10, op11_8, op7_4, 1330 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1331 (ins addrmode6:$Rn, am6offset:$Rm, 1332 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1333 IIC_VLD4lnu, "vld4", Dt, 1334"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1335"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1336 []> { 1337 let Inst{4} = Rn{4}; 1338 let DecoderMethod = "DecodeVLD4LN" ; 1339} 1340 1341def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1342 let Inst{7-5} = lane{2-0}; 1343} 1344def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1345 let Inst{7-6} = lane{1-0}; 1346} 1347def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1348 let Inst{7} = lane{0}; 1349 let Inst{5} = Rn{5}; 1350} 1351 1352def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1353def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1354def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1355 1356def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1357 let Inst{7-6} = lane{1-0}; 1358} 1359def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1360 let Inst{7} = lane{0}; 1361 let Inst{5} = Rn{5}; 1362} 1363 1364def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1365def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1366 1367} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1368 1369// VLD1DUP : Vector Load (single element to all lanes) 1370class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1371 Operand AddrMode> 1372 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1373 (ins AddrMode:$Rn), 1374 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1375 [(set VecListOneDAllLanes:$Vd, 1376 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>, 1377 Sched<[WriteVLD2]> { 1378 let Rm = 0b1111; 1379 let Inst{4} = Rn{4}; 1380 let DecoderMethod = "DecodeVLD1DupInstruction"; 1381} 1382def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1383 addrmode6dupalignNone>; 1384def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1385 addrmode6dupalign16>; 1386def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1387 addrmode6dupalign32>; 1388 1389let Predicates = [HasNEON] in { 1390def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1391 (VLD1DUPd32 addrmode6:$addr)>; 1392} 1393 1394class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1395 Operand AddrMode> 1396 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1397 (ins AddrMode:$Rn), IIC_VLD1dup, 1398 "vld1", Dt, "$Vd, $Rn", "", 1399 [(set VecListDPairAllLanes:$Vd, 1400 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1401 let Rm = 0b1111; 1402 let Inst{4} = Rn{4}; 1403 let DecoderMethod = "DecodeVLD1DupInstruction"; 1404} 1405 1406def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1407 addrmode6dupalignNone>; 1408def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1409 addrmode6dupalign16>; 1410def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1411 addrmode6dupalign32>; 1412 1413let Predicates = [HasNEON] in { 1414def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1415 (VLD1DUPq32 addrmode6:$addr)>; 1416} 1417 1418let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1419// ...with address register writeback: 1420multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1421 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1422 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1423 (ins AddrMode:$Rn), IIC_VLD1dupu, 1424 "vld1", Dt, "$Vd, $Rn!", 1425 "$Rn.addr = $wb", []> { 1426 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1427 let Inst{4} = Rn{4}; 1428 let DecoderMethod = "DecodeVLD1DupInstruction"; 1429 } 1430 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1431 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1432 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1433 "vld1", Dt, "$Vd, $Rn, $Rm", 1434 "$Rn.addr = $wb", []> { 1435 let Inst{4} = Rn{4}; 1436 let DecoderMethod = "DecodeVLD1DupInstruction"; 1437 } 1438} 1439multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1440 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1441 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1442 (ins AddrMode:$Rn), IIC_VLD1dupu, 1443 "vld1", Dt, "$Vd, $Rn!", 1444 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1445 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1446 let Inst{4} = Rn{4}; 1447 let DecoderMethod = "DecodeVLD1DupInstruction"; 1448 } 1449 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1450 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1451 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1452 "vld1", Dt, "$Vd, $Rn, $Rm", 1453 "$Rn.addr = $wb", []> { 1454 let Inst{4} = Rn{4}; 1455 let DecoderMethod = "DecodeVLD1DupInstruction"; 1456 } 1457} 1458 1459defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1460defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1461defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1462 1463defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1464defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1465defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1466 1467// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1468class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1469 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1470 (ins AddrMode:$Rn), IIC_VLD2dup, 1471 "vld2", Dt, "$Vd, $Rn", "", []> { 1472 let Rm = 0b1111; 1473 let Inst{4} = Rn{4}; 1474 let DecoderMethod = "DecodeVLD2DupInstruction"; 1475} 1476 1477def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1478 addrmode6dupalign16>; 1479def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1480 addrmode6dupalign32>; 1481def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1482 addrmode6dupalign64>; 1483 1484// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1485// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1486// ...with double-spaced registers 1487def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1488 addrmode6dupalign16>; 1489def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1490 addrmode6dupalign32>; 1491def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1492 addrmode6dupalign64>; 1493 1494def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1495def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1496def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1497def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1498def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1499def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1500 1501// ...with address register writeback: 1502multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1503 Operand AddrMode> { 1504 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1505 (outs VdTy:$Vd, GPR:$wb), 1506 (ins AddrMode:$Rn), IIC_VLD2dupu, 1507 "vld2", Dt, "$Vd, $Rn!", 1508 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1509 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1510 let Inst{4} = Rn{4}; 1511 let DecoderMethod = "DecodeVLD2DupInstruction"; 1512 } 1513 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1514 (outs VdTy:$Vd, GPR:$wb), 1515 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1516 "vld2", Dt, "$Vd, $Rn, $Rm", 1517 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1518 let Inst{4} = Rn{4}; 1519 let DecoderMethod = "DecodeVLD2DupInstruction"; 1520 } 1521} 1522 1523defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1524 addrmode6dupalign16>; 1525defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1526 addrmode6dupalign32>; 1527defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1528 addrmode6dupalign64>; 1529 1530defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1531 addrmode6dupalign16>; 1532defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1533 addrmode6dupalign32>; 1534defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1535 addrmode6dupalign64>; 1536 1537def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1538def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1539def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1540def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1541def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1542def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1543 1544// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1545class VLD3DUP<bits<4> op7_4, string Dt> 1546 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1547 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1548 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, 1549 Sched<[WriteVLD2]> { 1550 let Rm = 0b1111; 1551 let Inst{4} = 0; 1552 let DecoderMethod = "DecodeVLD3DupInstruction"; 1553} 1554 1555def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1556def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1557def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1558 1559def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1560def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1561def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1562 1563// ...with double-spaced registers (not used for codegen): 1564def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1565def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1566def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1567 1568def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1569def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1570def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1571def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1572def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1573def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1574 1575// ...with address register writeback: 1576class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1577 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1578 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1579 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1580 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1581 let Inst{4} = 0; 1582 let DecoderMethod = "DecodeVLD3DupInstruction"; 1583} 1584 1585def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1586def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1587def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1588 1589def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1590def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1591def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1592 1593def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1594def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1595def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1596 1597def VLD3DUPq8OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1598def VLD3DUPq16OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1599def VLD3DUPq32OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1600 1601// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1602class VLD4DUP<bits<4> op7_4, string Dt> 1603 : NLdSt<1, 0b10, 0b1111, op7_4, 1604 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1605 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1606 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1607 let Rm = 0b1111; 1608 let Inst{4} = Rn{4}; 1609 let DecoderMethod = "DecodeVLD4DupInstruction"; 1610} 1611 1612def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1613def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1614def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1615 1616def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1617def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1618def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1619 1620// ...with double-spaced registers (not used for codegen): 1621def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1622def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1623def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1624 1625def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1626def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1627def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1628def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1629def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1630def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1631 1632// ...with address register writeback: 1633class VLD4DUPWB<bits<4> op7_4, string Dt> 1634 : NLdSt<1, 0b10, 0b1111, op7_4, 1635 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1636 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1637 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1638 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1639 let Inst{4} = Rn{4}; 1640 let DecoderMethod = "DecodeVLD4DupInstruction"; 1641} 1642 1643def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1644def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1645def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1646 1647def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1648def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1649def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1650 1651def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1652def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1653def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1654 1655def VLD4DUPq8OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1656def VLD4DUPq16OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1657def VLD4DUPq32OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1658 1659} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1660 1661let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1662 1663// Classes for VST* pseudo-instructions with multi-register operands. 1664// These are expanded to real instructions after register allocation. 1665class VSTQPseudo<InstrItinClass itin> 1666 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1667class VSTQWBPseudo<InstrItinClass itin> 1668 : PseudoNLdSt<(outs GPR:$wb), 1669 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1670 "$addr.addr = $wb">; 1671class VSTQWBfixedPseudo<InstrItinClass itin> 1672 : PseudoNLdSt<(outs GPR:$wb), 1673 (ins addrmode6:$addr, QPR:$src), itin, 1674 "$addr.addr = $wb">; 1675class VSTQWBregisterPseudo<InstrItinClass itin> 1676 : PseudoNLdSt<(outs GPR:$wb), 1677 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1678 "$addr.addr = $wb">; 1679class VSTQQPseudo<InstrItinClass itin> 1680 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1681class VSTQQWBPseudo<InstrItinClass itin> 1682 : PseudoNLdSt<(outs GPR:$wb), 1683 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1684 "$addr.addr = $wb">; 1685class VSTQQWBfixedPseudo<InstrItinClass itin> 1686 : PseudoNLdSt<(outs GPR:$wb), 1687 (ins addrmode6:$addr, QQPR:$src), itin, 1688 "$addr.addr = $wb">; 1689class VSTQQWBregisterPseudo<InstrItinClass itin> 1690 : PseudoNLdSt<(outs GPR:$wb), 1691 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1692 "$addr.addr = $wb">; 1693 1694class VSTQQQQPseudo<InstrItinClass itin> 1695 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1696class VSTQQQQWBPseudo<InstrItinClass itin> 1697 : PseudoNLdSt<(outs GPR:$wb), 1698 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1699 "$addr.addr = $wb">; 1700 1701// VST1 : Vector Store (multiple single elements) 1702class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1703 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1704 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { 1705 let Rm = 0b1111; 1706 let Inst{4} = Rn{4}; 1707 let DecoderMethod = "DecodeVLDST1Instruction"; 1708} 1709class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1710 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1711 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { 1712 let Rm = 0b1111; 1713 let Inst{5-4} = Rn{5-4}; 1714 let DecoderMethod = "DecodeVLDST1Instruction"; 1715} 1716 1717def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1718def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1719def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1720def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1721 1722def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1723def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1724def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1725def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1726 1727// ...with address register writeback: 1728multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1729 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1730 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1731 "vst1", Dt, "$Vd, $Rn!", 1732 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1733 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1734 let Inst{4} = Rn{4}; 1735 let DecoderMethod = "DecodeVLDST1Instruction"; 1736 } 1737 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1738 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1739 IIC_VLD1u, 1740 "vst1", Dt, "$Vd, $Rn, $Rm", 1741 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1742 let Inst{4} = Rn{4}; 1743 let DecoderMethod = "DecodeVLDST1Instruction"; 1744 } 1745} 1746multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1747 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1748 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1749 "vst1", Dt, "$Vd, $Rn!", 1750 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1751 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1752 let Inst{5-4} = Rn{5-4}; 1753 let DecoderMethod = "DecodeVLDST1Instruction"; 1754 } 1755 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1756 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1757 IIC_VLD1x2u, 1758 "vst1", Dt, "$Vd, $Rn, $Rm", 1759 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1760 let Inst{5-4} = Rn{5-4}; 1761 let DecoderMethod = "DecodeVLDST1Instruction"; 1762 } 1763} 1764 1765defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1766defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1767defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1768defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1769 1770defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1771defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1772defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1773defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1774 1775// ...with 3 registers 1776class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1777 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1778 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1779 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { 1780 let Rm = 0b1111; 1781 let Inst{4} = Rn{4}; 1782 let DecoderMethod = "DecodeVLDST1Instruction"; 1783} 1784multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1785 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1786 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1787 "vst1", Dt, "$Vd, $Rn!", 1788 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1789 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1790 let Inst{5-4} = Rn{5-4}; 1791 let DecoderMethod = "DecodeVLDST1Instruction"; 1792 } 1793 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1794 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1795 IIC_VLD1x3u, 1796 "vst1", Dt, "$Vd, $Rn, $Rm", 1797 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1798 let Inst{5-4} = Rn{5-4}; 1799 let DecoderMethod = "DecodeVLDST1Instruction"; 1800 } 1801} 1802 1803def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1804def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1805def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1806def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1807 1808defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1809defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1810defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1811defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1812 1813def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1814def VST1d8TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1815def VST1d8TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1816def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1817def VST1d16TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1818def VST1d16TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1819def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1820def VST1d32TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1821def VST1d32TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1822def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1823def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1824def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1825 1826def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1827def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1828def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1829def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1830 1831def VST1q8HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1832def VST1q16HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1833def VST1q32HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1834def VST1q64HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1835 1836def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1837def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1838def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1839def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1840 1841// ...with 4 registers 1842class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1843 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1844 (ins AddrMode:$Rn, VecListFourD:$Vd), 1845 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1846 []>, Sched<[WriteVST4]> { 1847 let Rm = 0b1111; 1848 let Inst{5-4} = Rn{5-4}; 1849 let DecoderMethod = "DecodeVLDST1Instruction"; 1850} 1851multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1852 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1853 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1854 "vst1", Dt, "$Vd, $Rn!", 1855 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1856 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1857 let Inst{5-4} = Rn{5-4}; 1858 let DecoderMethod = "DecodeVLDST1Instruction"; 1859 } 1860 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1861 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1862 IIC_VLD1x4u, 1863 "vst1", Dt, "$Vd, $Rn, $Rm", 1864 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1865 let Inst{5-4} = Rn{5-4}; 1866 let DecoderMethod = "DecodeVLDST1Instruction"; 1867 } 1868} 1869 1870def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1871def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1872def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1873def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1874 1875defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1876defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1877defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1878defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1879 1880def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1881def VST1d8QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1882def VST1d8QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1883def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1884def VST1d16QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1885def VST1d16QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1886def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1887def VST1d32QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1888def VST1d32QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1889def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1890def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1891def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1892 1893def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1894def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1895def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1896def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1897 1898def VST1q8HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1899def VST1q16HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1900def VST1q32HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1901def VST1q64HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1902 1903def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1904def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1905def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1906def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1907 1908// VST2 : Vector Store (multiple 2-element structures) 1909class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1910 InstrItinClass itin, Operand AddrMode> 1911 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1912 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1913 let Rm = 0b1111; 1914 let Inst{5-4} = Rn{5-4}; 1915 let DecoderMethod = "DecodeVLDST2Instruction"; 1916} 1917 1918def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1919 addrmode6align64or128>, Sched<[WriteVST2]>; 1920def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1921 addrmode6align64or128>, Sched<[WriteVST2]>; 1922def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1923 addrmode6align64or128>, Sched<[WriteVST2]>; 1924 1925def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1926 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1927def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1928 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1929def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1930 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1931 1932def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1933def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1934def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1935 1936// ...with address register writeback: 1937multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1938 RegisterOperand VdTy, Operand AddrMode> { 1939 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1940 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1941 "vst2", Dt, "$Vd, $Rn!", 1942 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1943 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1944 let Inst{5-4} = Rn{5-4}; 1945 let DecoderMethod = "DecodeVLDST2Instruction"; 1946 } 1947 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1948 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1949 "vst2", Dt, "$Vd, $Rn, $Rm", 1950 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1951 let Inst{5-4} = Rn{5-4}; 1952 let DecoderMethod = "DecodeVLDST2Instruction"; 1953 } 1954} 1955multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1956 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1957 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1958 "vst2", Dt, "$Vd, $Rn!", 1959 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1960 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1961 let Inst{5-4} = Rn{5-4}; 1962 let DecoderMethod = "DecodeVLDST2Instruction"; 1963 } 1964 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1965 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1966 IIC_VLD1u, 1967 "vst2", Dt, "$Vd, $Rn, $Rm", 1968 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1969 let Inst{5-4} = Rn{5-4}; 1970 let DecoderMethod = "DecodeVLDST2Instruction"; 1971 } 1972} 1973 1974defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1975 addrmode6align64or128>; 1976defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1977 addrmode6align64or128>; 1978defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1979 addrmode6align64or128>; 1980 1981defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1982defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1983defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1984 1985def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1986def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1987def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1988def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1989def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1990def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1991 1992// ...with double-spaced registers 1993def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1994 addrmode6align64or128>; 1995def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1996 addrmode6align64or128>; 1997def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1998 addrmode6align64or128>; 1999defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 2000 addrmode6align64or128>; 2001defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 2002 addrmode6align64or128>; 2003defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 2004 addrmode6align64or128>; 2005 2006// VST3 : Vector Store (multiple 3-element structures) 2007class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 2008 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2009 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 2010 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { 2011 let Rm = 0b1111; 2012 let Inst{4} = Rn{4}; 2013 let DecoderMethod = "DecodeVLDST3Instruction"; 2014} 2015 2016def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 2017def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 2018def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 2019 2020def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2021def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2022def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2023 2024// ...with address register writeback: 2025class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2026 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2027 (ins addrmode6:$Rn, am6offset:$Rm, 2028 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 2029 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 2030 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 2031 let Inst{4} = Rn{4}; 2032 let DecoderMethod = "DecodeVLDST3Instruction"; 2033} 2034 2035def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 2036def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 2037def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 2038 2039def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2040def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2041def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2042 2043// ...with double-spaced registers: 2044def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 2045def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 2046def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 2047def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 2048def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 2049def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 2050 2051def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2052def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2053def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2054 2055// ...alternate versions to be allocated odd register numbers: 2056def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2057def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2058def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2059 2060def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2061def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2062def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2063 2064// VST4 : Vector Store (multiple 4-element structures) 2065class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 2066 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2067 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 2068 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 2069 "", []>, Sched<[WriteVST4]> { 2070 let Rm = 0b1111; 2071 let Inst{5-4} = Rn{5-4}; 2072 let DecoderMethod = "DecodeVLDST4Instruction"; 2073} 2074 2075def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 2076def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 2077def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 2078 2079def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2080def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2081def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2082 2083// ...with address register writeback: 2084class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2085 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2086 (ins addrmode6:$Rn, am6offset:$Rm, 2087 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 2088 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 2089 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 2090 let Inst{5-4} = Rn{5-4}; 2091 let DecoderMethod = "DecodeVLDST4Instruction"; 2092} 2093 2094def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 2095def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 2096def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 2097 2098def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2099def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2100def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2101 2102// ...with double-spaced registers: 2103def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2104def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2105def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2106def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2107def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2108def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2109 2110def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2111def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2112def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2113 2114// ...alternate versions to be allocated odd register numbers: 2115def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2116def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2117def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2118 2119def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2120def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2121def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2122 2123} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2124 2125// Classes for VST*LN pseudo-instructions with multi-register operands. 2126// These are expanded to real instructions after register allocation. 2127class VSTQLNPseudo<InstrItinClass itin> 2128 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2129 itin, "">; 2130class VSTQLNWBPseudo<InstrItinClass itin> 2131 : PseudoNLdSt<(outs GPR:$wb), 2132 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2133 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2134class VSTQQLNPseudo<InstrItinClass itin> 2135 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2136 itin, "">; 2137class VSTQQLNWBPseudo<InstrItinClass itin> 2138 : PseudoNLdSt<(outs GPR:$wb), 2139 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2140 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2141class VSTQQQQLNPseudo<InstrItinClass itin> 2142 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2143 itin, "">; 2144class VSTQQQQLNWBPseudo<InstrItinClass itin> 2145 : PseudoNLdSt<(outs GPR:$wb), 2146 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2147 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2148 2149// VST1LN : Vector Store (single element from one lane) 2150class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2151 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2152 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2153 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2154 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2155 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, 2156 Sched<[WriteVST1]> { 2157 let Rm = 0b1111; 2158 let DecoderMethod = "DecodeVST1LN"; 2159} 2160class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2161 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> { 2162 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2163 addrmode6:$addr)]; 2164} 2165 2166def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2167 ARMvgetlaneu, addrmode6> { 2168 let Inst{7-5} = lane{2-0}; 2169} 2170def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2171 ARMvgetlaneu, addrmode6> { 2172 let Inst{7-6} = lane{1-0}; 2173 let Inst{4} = Rn{4}; 2174} 2175 2176def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2177 addrmode6oneL32> { 2178 let Inst{7} = lane{0}; 2179 let Inst{5-4} = Rn{5-4}; 2180} 2181 2182def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>; 2183def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>; 2184def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2185 2186let Predicates = [HasNEON] in { 2187def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2188 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2189def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2190 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2191 2192def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr), 2193 (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 2194def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr), 2195 (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2196} 2197 2198// ...with address register writeback: 2199class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2200 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2201 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2202 (ins AdrMode:$Rn, am6offset:$Rm, 2203 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2204 "\\{$Vd[$lane]\\}, $Rn$Rm", 2205 "$Rn.addr = $wb", 2206 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2207 AdrMode:$Rn, am6offset:$Rm))]>, 2208 Sched<[WriteVST1]> { 2209 let DecoderMethod = "DecodeVST1LN"; 2210} 2211class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2212 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> { 2213 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2214 addrmode6:$addr, am6offset:$offset))]; 2215} 2216 2217def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2218 ARMvgetlaneu, addrmode6> { 2219 let Inst{7-5} = lane{2-0}; 2220} 2221def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2222 ARMvgetlaneu, addrmode6> { 2223 let Inst{7-6} = lane{1-0}; 2224 let Inst{4} = Rn{4}; 2225} 2226def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2227 extractelt, addrmode6oneL32> { 2228 let Inst{7} = lane{0}; 2229 let Inst{5-4} = Rn{5-4}; 2230} 2231 2232def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>; 2233def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>; 2234def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2235 2236let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2237 2238// VST2LN : Vector Store (single 2-element structure from one lane) 2239class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2240 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2241 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2242 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2243 "", []>, Sched<[WriteVST1]> { 2244 let Rm = 0b1111; 2245 let Inst{4} = Rn{4}; 2246 let DecoderMethod = "DecodeVST2LN"; 2247} 2248 2249def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2250 let Inst{7-5} = lane{2-0}; 2251} 2252def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2253 let Inst{7-6} = lane{1-0}; 2254} 2255def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2256 let Inst{7} = lane{0}; 2257} 2258 2259def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2260def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2261def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2262 2263// ...with double-spaced registers: 2264def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2265 let Inst{7-6} = lane{1-0}; 2266 let Inst{4} = Rn{4}; 2267} 2268def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2269 let Inst{7} = lane{0}; 2270 let Inst{4} = Rn{4}; 2271} 2272 2273def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2274def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2275 2276// ...with address register writeback: 2277class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2278 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2279 (ins addrmode6:$Rn, am6offset:$Rm, 2280 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2281 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2282 "$Rn.addr = $wb", []> { 2283 let Inst{4} = Rn{4}; 2284 let DecoderMethod = "DecodeVST2LN"; 2285} 2286 2287def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2288 let Inst{7-5} = lane{2-0}; 2289} 2290def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2291 let Inst{7-6} = lane{1-0}; 2292} 2293def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2294 let Inst{7} = lane{0}; 2295} 2296 2297def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2298def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2299def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2300 2301def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2302 let Inst{7-6} = lane{1-0}; 2303} 2304def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2305 let Inst{7} = lane{0}; 2306} 2307 2308def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2309def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2310 2311// VST3LN : Vector Store (single 3-element structure from one lane) 2312class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2313 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2314 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2315 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2316 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, 2317 Sched<[WriteVST2]> { 2318 let Rm = 0b1111; 2319 let DecoderMethod = "DecodeVST3LN"; 2320} 2321 2322def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2323 let Inst{7-5} = lane{2-0}; 2324} 2325def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2326 let Inst{7-6} = lane{1-0}; 2327} 2328def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2329 let Inst{7} = lane{0}; 2330} 2331 2332def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2333def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2334def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2335 2336// ...with double-spaced registers: 2337def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2338 let Inst{7-6} = lane{1-0}; 2339} 2340def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2341 let Inst{7} = lane{0}; 2342} 2343 2344def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2345def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2346 2347// ...with address register writeback: 2348class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2349 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2350 (ins addrmode6:$Rn, am6offset:$Rm, 2351 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2352 IIC_VST3lnu, "vst3", Dt, 2353 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2354 "$Rn.addr = $wb", []> { 2355 let DecoderMethod = "DecodeVST3LN"; 2356} 2357 2358def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2359 let Inst{7-5} = lane{2-0}; 2360} 2361def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2362 let Inst{7-6} = lane{1-0}; 2363} 2364def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2365 let Inst{7} = lane{0}; 2366} 2367 2368def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2369def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2370def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2371 2372def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2373 let Inst{7-6} = lane{1-0}; 2374} 2375def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2376 let Inst{7} = lane{0}; 2377} 2378 2379def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2380def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2381 2382// VST4LN : Vector Store (single 4-element structure from one lane) 2383class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2384 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2385 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2386 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2387 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2388 "", []>, Sched<[WriteVST2]> { 2389 let Rm = 0b1111; 2390 let Inst{4} = Rn{4}; 2391 let DecoderMethod = "DecodeVST4LN"; 2392} 2393 2394def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2395 let Inst{7-5} = lane{2-0}; 2396} 2397def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2398 let Inst{7-6} = lane{1-0}; 2399} 2400def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2401 let Inst{7} = lane{0}; 2402 let Inst{5} = Rn{5}; 2403} 2404 2405def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2406def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2407def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2408 2409// ...with double-spaced registers: 2410def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2411 let Inst{7-6} = lane{1-0}; 2412} 2413def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2414 let Inst{7} = lane{0}; 2415 let Inst{5} = Rn{5}; 2416} 2417 2418def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2419def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2420 2421// ...with address register writeback: 2422class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2423 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2424 (ins addrmode6:$Rn, am6offset:$Rm, 2425 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2426 IIC_VST4lnu, "vst4", Dt, 2427 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2428 "$Rn.addr = $wb", []> { 2429 let Inst{4} = Rn{4}; 2430 let DecoderMethod = "DecodeVST4LN"; 2431} 2432 2433def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2434 let Inst{7-5} = lane{2-0}; 2435} 2436def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2437 let Inst{7-6} = lane{1-0}; 2438} 2439def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2440 let Inst{7} = lane{0}; 2441 let Inst{5} = Rn{5}; 2442} 2443 2444def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2445def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2446def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2447 2448def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2449 let Inst{7-6} = lane{1-0}; 2450} 2451def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2452 let Inst{7} = lane{0}; 2453 let Inst{5} = Rn{5}; 2454} 2455 2456def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2457def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2458 2459} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2460 2461// Use vld1/vst1 for unaligned f64 load / store 2462let Predicates = [IsLE,HasNEON] in { 2463def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2464 (VLD1d16 addrmode6:$addr)>; 2465def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2466 (VST1d16 addrmode6:$addr, DPR:$value)>; 2467def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2468 (VLD1d8 addrmode6:$addr)>; 2469def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2470 (VST1d8 addrmode6:$addr, DPR:$value)>; 2471} 2472let Predicates = [IsBE,HasNEON] in { 2473def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2474 (VLD1d64 addrmode6:$addr)>; 2475def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2476 (VST1d64 addrmode6:$addr, DPR:$value)>; 2477} 2478 2479// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2480// load / store if it's legal. 2481let Predicates = [HasNEON] in { 2482def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2483 (VLD1q64 addrmode6:$addr)>; 2484def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2485 (VST1q64 addrmode6:$addr, QPR:$value)>; 2486} 2487let Predicates = [IsLE,HasNEON] in { 2488def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2489 (VLD1q32 addrmode6:$addr)>; 2490def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2491 (VST1q32 addrmode6:$addr, QPR:$value)>; 2492def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2493 (VLD1q16 addrmode6:$addr)>; 2494def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2495 (VST1q16 addrmode6:$addr, QPR:$value)>; 2496def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2497 (VLD1q8 addrmode6:$addr)>; 2498def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2499 (VST1q8 addrmode6:$addr, QPR:$value)>; 2500} 2501 2502//===----------------------------------------------------------------------===// 2503// Instruction Classes 2504//===----------------------------------------------------------------------===// 2505 2506// Basic 2-register operations: double- and quad-register. 2507class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2508 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2509 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2510 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2511 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2512 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2513class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2514 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2515 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2516 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2517 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2518 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2519 2520// Basic 2-register intrinsics, both double- and quad-register. 2521class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2522 bits<2> op17_16, bits<5> op11_7, bit op4, 2523 InstrItinClass itin, string OpcodeStr, string Dt, 2524 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2525 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2526 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2527 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2528class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2529 bits<2> op17_16, bits<5> op11_7, bit op4, 2530 InstrItinClass itin, string OpcodeStr, string Dt, 2531 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2532 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2533 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2534 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2535 2536// Same as above, but not predicated. 2537class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2538 InstrItinClass itin, string OpcodeStr, string Dt, 2539 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2540 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2541 itin, OpcodeStr, Dt, 2542 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2543 2544class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2545 InstrItinClass itin, string OpcodeStr, string Dt, 2546 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2547 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2548 itin, OpcodeStr, Dt, 2549 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2550 2551// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2552class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2553 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2554 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2555 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2556 itin, OpcodeStr, Dt, 2557 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2558 2559// Same as N2VQIntXnp but with Vd as a src register. 2560class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2561 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2562 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2563 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2564 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2565 itin, OpcodeStr, Dt, 2566 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2567 let Constraints = "$src = $Vd"; 2568} 2569 2570// Narrow 2-register operations. 2571class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2572 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2573 InstrItinClass itin, string OpcodeStr, string Dt, 2574 ValueType TyD, ValueType TyQ, SDNode OpNode> 2575 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2576 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2577 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2578 2579// Narrow 2-register intrinsics. 2580class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2581 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2582 InstrItinClass itin, string OpcodeStr, string Dt, 2583 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2584 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2585 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2586 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2587 2588// Long 2-register operations (currently only used for VMOVL). 2589class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2590 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2591 InstrItinClass itin, string OpcodeStr, string Dt, 2592 ValueType TyQ, ValueType TyD, SDNode OpNode> 2593 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2594 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2595 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2596 2597// Long 2-register intrinsics. 2598class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2599 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2600 InstrItinClass itin, string OpcodeStr, string Dt, 2601 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2602 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2603 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2604 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2605 2606// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2607class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2608 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2609 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2610 OpcodeStr, Dt, "$Vd, $Vm", 2611 "$src1 = $Vd, $src2 = $Vm", []>; 2612class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2613 InstrItinClass itin, string OpcodeStr, string Dt> 2614 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2615 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2616 "$src1 = $Vd, $src2 = $Vm", []>; 2617 2618// Basic 3-register operations: double- and quad-register. 2619class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2620 InstrItinClass itin, string OpcodeStr, string Dt, 2621 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2622 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2623 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2624 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2625 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2626 // All of these have a two-operand InstAlias. 2627 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2628 let isCommutable = Commutable; 2629} 2630// Same as N3VD but no data type. 2631class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2632 InstrItinClass itin, string OpcodeStr, 2633 ValueType ResTy, ValueType OpTy, 2634 SDNode OpNode, bit Commutable> 2635 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2636 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2637 OpcodeStr, "$Vd, $Vn, $Vm", "", 2638 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2639 // All of these have a two-operand InstAlias. 2640 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2641 let isCommutable = Commutable; 2642} 2643 2644class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2645 InstrItinClass itin, string OpcodeStr, string Dt, 2646 ValueType Ty, SDNode ShOp> 2647 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2648 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2649 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2650 [(set (Ty DPR:$Vd), 2651 (Ty (ShOp (Ty DPR:$Vn), 2652 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2653 // All of these have a two-operand InstAlias. 2654 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2655 let isCommutable = 0; 2656} 2657class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2658 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2659 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2660 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2661 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2662 [(set (Ty DPR:$Vd), 2663 (Ty (ShOp (Ty DPR:$Vn), 2664 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2665 // All of these have a two-operand InstAlias. 2666 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2667 let isCommutable = 0; 2668} 2669 2670class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2671 InstrItinClass itin, string OpcodeStr, string Dt, 2672 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2673 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2674 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2675 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2676 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2677 // All of these have a two-operand InstAlias. 2678 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2679 let isCommutable = Commutable; 2680} 2681class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2682 InstrItinClass itin, string OpcodeStr, 2683 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2684 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2685 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2686 OpcodeStr, "$Vd, $Vn, $Vm", "", 2687 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2688 // All of these have a two-operand InstAlias. 2689 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2690 let isCommutable = Commutable; 2691} 2692class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2693 InstrItinClass itin, string OpcodeStr, string Dt, 2694 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2695 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2696 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2697 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2698 [(set (ResTy QPR:$Vd), 2699 (ResTy (ShOp (ResTy QPR:$Vn), 2700 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2701 imm:$lane)))))]> { 2702 // All of these have a two-operand InstAlias. 2703 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2704 let isCommutable = 0; 2705} 2706class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2707 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2708 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2709 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2710 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2711 [(set (ResTy QPR:$Vd), 2712 (ResTy (ShOp (ResTy QPR:$Vn), 2713 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2714 imm:$lane)))))]> { 2715 // All of these have a two-operand InstAlias. 2716 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2717 let isCommutable = 0; 2718} 2719 2720// Basic 3-register intrinsics, both double- and quad-register. 2721class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2722 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2723 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2724 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2725 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2726 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2727 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2728 // All of these have a two-operand InstAlias. 2729 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2730 let isCommutable = Commutable; 2731} 2732 2733class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2734 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2735 string Dt, ValueType ResTy, ValueType OpTy, 2736 SDPatternOperator IntOp, bit Commutable> 2737 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2738 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt, 2739 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2740 let isCommutable = Commutable; 2741} 2742 2743 2744class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2745 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2746 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2747 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2748 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2749 [(set (Ty DPR:$Vd), 2750 (Ty (IntOp (Ty DPR:$Vn), 2751 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2752 imm:$lane)))))]> { 2753 let isCommutable = 0; 2754} 2755 2756class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2757 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2758 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2759 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2760 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2761 [(set (Ty DPR:$Vd), 2762 (Ty (IntOp (Ty DPR:$Vn), 2763 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2764 let isCommutable = 0; 2765} 2766class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2767 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2768 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2769 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2770 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2771 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2772 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2773 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2774 let isCommutable = 0; 2775} 2776 2777class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2778 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2779 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2780 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2781 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2782 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2783 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2784 // All of these have a two-operand InstAlias. 2785 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2786 let isCommutable = Commutable; 2787} 2788 2789class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2790 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2791 string Dt, ValueType ResTy, ValueType OpTy, 2792 SDPatternOperator IntOp, bit Commutable> 2793 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2794 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2795 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2796 let isCommutable = Commutable; 2797} 2798 2799// Same as N3VQIntnp but with Vd as a src register. 2800class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2801 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2802 string Dt, ValueType ResTy, ValueType OpTy, 2803 SDPatternOperator IntOp> 2804 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2805 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2806 f, itin, OpcodeStr, Dt, 2807 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2808 (OpTy QPR:$Vm))))]> { 2809 let Constraints = "$src = $Vd"; 2810 let isCommutable = 0; 2811} 2812 2813class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2814 string OpcodeStr, string Dt, 2815 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2816 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2817 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2818 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2819 [(set (ResTy QPR:$Vd), 2820 (ResTy (IntOp (ResTy QPR:$Vn), 2821 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2822 imm:$lane)))))]> { 2823 let isCommutable = 0; 2824} 2825class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2826 string OpcodeStr, string Dt, 2827 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2828 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2829 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2830 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2831 [(set (ResTy QPR:$Vd), 2832 (ResTy (IntOp (ResTy QPR:$Vn), 2833 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2834 imm:$lane)))))]> { 2835 let isCommutable = 0; 2836} 2837class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2838 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2839 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2840 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2841 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2842 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2843 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2844 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2845 let isCommutable = 0; 2846} 2847 2848// Multiply-Add/Sub operations: double- and quad-register. 2849class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2850 InstrItinClass itin, string OpcodeStr, string Dt, 2851 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2852 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2853 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2854 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2855 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2856 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2857 2858class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2859 string OpcodeStr, string Dt, 2860 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2861 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2862 (outs DPR:$Vd), 2863 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2864 NVMulSLFrm, itin, 2865 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2866 [(set (Ty DPR:$Vd), 2867 (Ty (ShOp (Ty DPR:$src1), 2868 (Ty (MulOp DPR:$Vn, 2869 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2870 imm:$lane)))))))]>; 2871class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2872 string OpcodeStr, string Dt, 2873 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2874 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2875 (outs DPR:$Vd), 2876 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2877 NVMulSLFrm, itin, 2878 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2879 [(set (Ty DPR:$Vd), 2880 (Ty (ShOp (Ty DPR:$src1), 2881 (Ty (MulOp DPR:$Vn, 2882 (Ty (ARMvduplane (Ty DPR_8:$Vm), 2883 imm:$lane)))))))]>; 2884 2885class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2886 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2887 SDPatternOperator MulOp, SDPatternOperator OpNode> 2888 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2889 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2890 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2891 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2892 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2893class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2894 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2895 SDPatternOperator MulOp, SDPatternOperator ShOp> 2896 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2897 (outs QPR:$Vd), 2898 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2899 NVMulSLFrm, itin, 2900 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2901 [(set (ResTy QPR:$Vd), 2902 (ResTy (ShOp (ResTy QPR:$src1), 2903 (ResTy (MulOp QPR:$Vn, 2904 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2905 imm:$lane)))))))]>; 2906class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2907 string OpcodeStr, string Dt, 2908 ValueType ResTy, ValueType OpTy, 2909 SDPatternOperator MulOp, SDPatternOperator ShOp> 2910 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2911 (outs QPR:$Vd), 2912 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2913 NVMulSLFrm, itin, 2914 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2915 [(set (ResTy QPR:$Vd), 2916 (ResTy (ShOp (ResTy QPR:$src1), 2917 (ResTy (MulOp QPR:$Vn, 2918 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2919 imm:$lane)))))))]>; 2920 2921// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2922class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2923 InstrItinClass itin, string OpcodeStr, string Dt, 2924 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2925 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2926 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2927 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2928 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2929 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2930class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2931 InstrItinClass itin, string OpcodeStr, string Dt, 2932 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2933 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2934 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2935 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2936 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2937 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2938 2939// Neon 3-argument intrinsics, both double- and quad-register. 2940// The destination register is also used as the first source operand register. 2941class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2942 InstrItinClass itin, string OpcodeStr, string Dt, 2943 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2944 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2945 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2946 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2947 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2948 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2949class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2950 InstrItinClass itin, string OpcodeStr, string Dt, 2951 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2952 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2953 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2954 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2955 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2956 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2957 2958// Long Multiply-Add/Sub operations. 2959class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2960 InstrItinClass itin, string OpcodeStr, string Dt, 2961 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2962 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2963 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2964 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2965 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2966 (TyQ (MulOp (TyD DPR:$Vn), 2967 (TyD DPR:$Vm)))))]>; 2968class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2969 InstrItinClass itin, string OpcodeStr, string Dt, 2970 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2971 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2972 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2973 NVMulSLFrm, itin, 2974 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2975 [(set QPR:$Vd, 2976 (OpNode (TyQ QPR:$src1), 2977 (TyQ (MulOp (TyD DPR:$Vn), 2978 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm), 2979 imm:$lane))))))]>; 2980class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2981 InstrItinClass itin, string OpcodeStr, string Dt, 2982 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2983 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2984 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2985 NVMulSLFrm, itin, 2986 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2987 [(set QPR:$Vd, 2988 (OpNode (TyQ QPR:$src1), 2989 (TyQ (MulOp (TyD DPR:$Vn), 2990 (TyD (ARMvduplane (TyD DPR_8:$Vm), 2991 imm:$lane))))))]>; 2992 2993// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2994class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2995 InstrItinClass itin, string OpcodeStr, string Dt, 2996 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2997 SDNode OpNode> 2998 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2999 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3000 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 3001 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 3002 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3003 (TyD DPR:$Vm)))))))]>; 3004 3005// Neon Long 3-argument intrinsic. The destination register is 3006// a quad-register and is also used as the first source operand register. 3007class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3008 InstrItinClass itin, string OpcodeStr, string Dt, 3009 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 3010 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3011 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3012 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 3013 [(set QPR:$Vd, 3014 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 3015class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3016 string OpcodeStr, string Dt, 3017 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3018 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3019 (outs QPR:$Vd), 3020 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3021 NVMulSLFrm, itin, 3022 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3023 [(set (ResTy QPR:$Vd), 3024 (ResTy (IntOp (ResTy QPR:$src1), 3025 (OpTy DPR:$Vn), 3026 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3027 imm:$lane)))))]>; 3028class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3029 InstrItinClass itin, string OpcodeStr, string Dt, 3030 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3031 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3032 (outs QPR:$Vd), 3033 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3034 NVMulSLFrm, itin, 3035 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3036 [(set (ResTy QPR:$Vd), 3037 (ResTy (IntOp (ResTy QPR:$src1), 3038 (OpTy DPR:$Vn), 3039 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3040 imm:$lane)))))]>; 3041 3042// Narrowing 3-register intrinsics. 3043class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3044 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 3045 SDPatternOperator IntOp, bit Commutable> 3046 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3047 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 3048 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3049 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 3050 let isCommutable = Commutable; 3051} 3052 3053// Long 3-register operations. 3054class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3055 InstrItinClass itin, string OpcodeStr, string Dt, 3056 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 3057 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3058 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3059 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3060 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3061 let isCommutable = Commutable; 3062} 3063 3064class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 3065 InstrItinClass itin, string OpcodeStr, string Dt, 3066 ValueType TyQ, ValueType TyD, SDNode OpNode> 3067 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3068 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3069 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3070 [(set QPR:$Vd, 3071 (TyQ (OpNode (TyD DPR:$Vn), 3072 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 3073class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3074 InstrItinClass itin, string OpcodeStr, string Dt, 3075 ValueType TyQ, ValueType TyD, SDNode OpNode> 3076 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3077 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3078 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3079 [(set QPR:$Vd, 3080 (TyQ (OpNode (TyD DPR:$Vn), 3081 (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3082 3083// Long 3-register operations with explicitly extended operands. 3084class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3085 InstrItinClass itin, string OpcodeStr, string Dt, 3086 ValueType TyQ, ValueType TyD, SDNode OpNode, SDPatternOperator ExtOp, 3087 bit Commutable> 3088 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3089 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3090 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3091 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3092 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3093 let isCommutable = Commutable; 3094} 3095 3096// Long 3-register intrinsics with explicit extend (VABDL). 3097class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3098 InstrItinClass itin, string OpcodeStr, string Dt, 3099 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3100 bit Commutable> 3101 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3102 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3103 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3104 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3105 (TyD DPR:$Vm))))))]> { 3106 let isCommutable = Commutable; 3107} 3108 3109// Long 3-register intrinsics. 3110class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3111 InstrItinClass itin, string OpcodeStr, string Dt, 3112 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3113 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3114 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3115 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3116 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3117 let isCommutable = Commutable; 3118} 3119 3120// Same as above, but not predicated. 3121class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3122 bit op4, InstrItinClass itin, string OpcodeStr, 3123 string Dt, ValueType ResTy, ValueType OpTy, 3124 SDPatternOperator IntOp, bit Commutable> 3125 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3126 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3127 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 3128 let isCommutable = Commutable; 3129} 3130 3131 3132class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3133 string OpcodeStr, string Dt, 3134 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3135 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3136 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3137 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3138 [(set (ResTy QPR:$Vd), 3139 (ResTy (IntOp (OpTy DPR:$Vn), 3140 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3141 imm:$lane)))))]>; 3142class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3143 InstrItinClass itin, string OpcodeStr, string Dt, 3144 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3145 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3146 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3147 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3148 [(set (ResTy QPR:$Vd), 3149 (ResTy (IntOp (OpTy DPR:$Vn), 3150 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3151 imm:$lane)))))]>; 3152 3153// Wide 3-register operations. 3154class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3155 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3156 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable> 3157 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3158 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3159 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3160 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3161 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3162 // All of these have a two-operand InstAlias. 3163 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3164 let isCommutable = Commutable; 3165} 3166 3167// Pairwise long 2-register intrinsics, both double- and quad-register. 3168class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3169 bits<2> op17_16, bits<5> op11_7, bit op4, 3170 string OpcodeStr, string Dt, 3171 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3172 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3173 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3174 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3175class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3176 bits<2> op17_16, bits<5> op11_7, bit op4, 3177 string OpcodeStr, string Dt, 3178 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3179 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3180 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3181 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3182 3183// Pairwise long 2-register accumulate intrinsics, 3184// both double- and quad-register. 3185// The destination register is also used as the first source operand register. 3186class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3187 bits<2> op17_16, bits<5> op11_7, bit op4, 3188 string OpcodeStr, string Dt, 3189 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3190 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3191 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3192 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3193 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3194class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3195 bits<2> op17_16, bits<5> op11_7, bit op4, 3196 string OpcodeStr, string Dt, 3197 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3198 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3199 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3200 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3201 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3202 3203// Shift by immediate, 3204// both double- and quad-register. 3205let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3206class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3207 Format f, InstrItinClass itin, Operand ImmTy, 3208 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3209 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3210 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3211 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3212 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3213class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3214 Format f, InstrItinClass itin, Operand ImmTy, 3215 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3216 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3217 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3218 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3219 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3220} 3221 3222// Long shift by immediate. 3223class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3224 string OpcodeStr, string Dt, 3225 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3226 SDPatternOperator OpNode> 3227 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3228 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3229 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3230 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3231 3232// Narrow shift by immediate. 3233class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3234 InstrItinClass itin, string OpcodeStr, string Dt, 3235 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3236 SDPatternOperator OpNode> 3237 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3238 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3239 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3240 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3241 (i32 ImmTy:$SIMM))))]>; 3242 3243// Shift right by immediate and accumulate, 3244// both double- and quad-register. 3245let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3246class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3247 Operand ImmTy, string OpcodeStr, string Dt, 3248 ValueType Ty, SDNode ShOp> 3249 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3250 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3251 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3252 [(set DPR:$Vd, (Ty (add DPR:$src1, 3253 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3254class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3255 Operand ImmTy, string OpcodeStr, string Dt, 3256 ValueType Ty, SDNode ShOp> 3257 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3258 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3259 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3260 [(set QPR:$Vd, (Ty (add QPR:$src1, 3261 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3262} 3263 3264// Shift by immediate and insert, 3265// both double- and quad-register. 3266let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3267class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3268 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3269 ValueType Ty,SDNode ShOp> 3270 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3271 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3272 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3273 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3274class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3275 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3276 ValueType Ty,SDNode ShOp> 3277 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3278 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3279 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3280 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3281} 3282 3283// Convert, with fractional bits immediate, 3284// both double- and quad-register. 3285class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3286 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3287 SDPatternOperator IntOp> 3288 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3289 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3290 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3291 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3292class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3293 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3294 SDPatternOperator IntOp> 3295 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3296 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3297 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3298 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3299 3300//===----------------------------------------------------------------------===// 3301// Multiclasses 3302//===----------------------------------------------------------------------===// 3303 3304// Abbreviations used in multiclass suffixes: 3305// Q = quarter int (8 bit) elements 3306// H = half int (16 bit) elements 3307// S = single int (32 bit) elements 3308// D = double int (64 bit) elements 3309 3310// Neon 2-register vector operations and intrinsics. 3311 3312// Neon 2-register comparisons. 3313// source operand element sizes of 8, 16 and 32 bits: 3314multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3315 bits<5> op11_7, bit op4, string opc, string Dt, 3316 string asm, PatFrag fc> { 3317 // 64-bit vector types. 3318 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3319 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3320 opc, !strconcat(Dt, "8"), asm, "", 3321 [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>; 3322 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3323 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3324 opc, !strconcat(Dt, "16"), asm, "", 3325 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>; 3326 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3327 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3328 opc, !strconcat(Dt, "32"), asm, "", 3329 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>; 3330 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3331 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3332 opc, "f32", asm, "", 3333 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> { 3334 let Inst{10} = 1; // overwrite F = 1 3335 } 3336 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3337 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3338 opc, "f16", asm, "", 3339 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>, 3340 Requires<[HasNEON,HasFullFP16]> { 3341 let Inst{10} = 1; // overwrite F = 1 3342 } 3343 3344 // 128-bit vector types. 3345 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3346 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3347 opc, !strconcat(Dt, "8"), asm, "", 3348 [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>; 3349 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3350 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3351 opc, !strconcat(Dt, "16"), asm, "", 3352 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>; 3353 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3354 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3355 opc, !strconcat(Dt, "32"), asm, "", 3356 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>; 3357 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3358 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3359 opc, "f32", asm, "", 3360 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> { 3361 let Inst{10} = 1; // overwrite F = 1 3362 } 3363 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3364 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3365 opc, "f16", asm, "", 3366 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>, 3367 Requires<[HasNEON,HasFullFP16]> { 3368 let Inst{10} = 1; // overwrite F = 1 3369 } 3370} 3371 3372// Neon 3-register comparisons. 3373class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3374 InstrItinClass itin, string OpcodeStr, string Dt, 3375 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3376 : N3V<op24, op23, op21_20, op11_8, 1, op4, 3377 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 3378 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3379 [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> { 3380 // All of these have a two-operand InstAlias. 3381 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3382 let isCommutable = Commutable; 3383} 3384 3385class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3386 InstrItinClass itin, string OpcodeStr, string Dt, 3387 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3388 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3389 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3390 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3391 [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> { 3392 // All of these have a two-operand InstAlias. 3393 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3394 let isCommutable = Commutable; 3395} 3396 3397multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4, 3398 InstrItinClass itinD16, InstrItinClass itinD32, 3399 InstrItinClass itinQ16, InstrItinClass itinQ32, 3400 string OpcodeStr, string Dt, 3401 PatFrag fc, bit Commutable = 0> { 3402 // 64-bit vector types. 3403 def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16, 3404 OpcodeStr, !strconcat(Dt, "8"), 3405 v8i8, v8i8, fc, Commutable>; 3406 def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16, 3407 OpcodeStr, !strconcat(Dt, "16"), 3408 v4i16, v4i16, fc, Commutable>; 3409 def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32, 3410 OpcodeStr, !strconcat(Dt, "32"), 3411 v2i32, v2i32, fc, Commutable>; 3412 3413 // 128-bit vector types. 3414 def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16, 3415 OpcodeStr, !strconcat(Dt, "8"), 3416 v16i8, v16i8, fc, Commutable>; 3417 def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16, 3418 OpcodeStr, !strconcat(Dt, "16"), 3419 v8i16, v8i16, fc, Commutable>; 3420 def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32, 3421 OpcodeStr, !strconcat(Dt, "32"), 3422 v4i32, v4i32, fc, Commutable>; 3423} 3424 3425 3426// Neon 2-register vector intrinsics, 3427// element sizes of 8, 16 and 32 bits: 3428multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3429 bits<5> op11_7, bit op4, 3430 InstrItinClass itinD, InstrItinClass itinQ, 3431 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3432 // 64-bit vector types. 3433 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3434 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3435 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3436 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3437 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3438 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3439 3440 // 128-bit vector types. 3441 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3442 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3443 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3444 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3445 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3446 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3447} 3448 3449 3450// Neon Narrowing 2-register vector operations, 3451// source operand element sizes of 16, 32 and 64 bits: 3452multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3453 bits<5> op11_7, bit op6, bit op4, 3454 InstrItinClass itin, string OpcodeStr, string Dt, 3455 SDNode OpNode> { 3456 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3457 itin, OpcodeStr, !strconcat(Dt, "16"), 3458 v8i8, v8i16, OpNode>; 3459 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3460 itin, OpcodeStr, !strconcat(Dt, "32"), 3461 v4i16, v4i32, OpNode>; 3462 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3463 itin, OpcodeStr, !strconcat(Dt, "64"), 3464 v2i32, v2i64, OpNode>; 3465} 3466 3467// Neon Narrowing 2-register vector intrinsics, 3468// source operand element sizes of 16, 32 and 64 bits: 3469multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3470 bits<5> op11_7, bit op6, bit op4, 3471 InstrItinClass itin, string OpcodeStr, string Dt, 3472 SDPatternOperator IntOp> { 3473 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3474 itin, OpcodeStr, !strconcat(Dt, "16"), 3475 v8i8, v8i16, IntOp>; 3476 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3477 itin, OpcodeStr, !strconcat(Dt, "32"), 3478 v4i16, v4i32, IntOp>; 3479 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3480 itin, OpcodeStr, !strconcat(Dt, "64"), 3481 v2i32, v2i64, IntOp>; 3482} 3483 3484 3485// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3486// source operand element sizes of 16, 32 and 64 bits: 3487multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3488 string OpcodeStr, string Dt, SDNode OpNode> { 3489 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3490 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3491 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3492 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3493 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3494 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3495} 3496 3497 3498// Neon 3-register vector operations. 3499 3500// First with only element sizes of 8, 16 and 32 bits: 3501multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3502 InstrItinClass itinD16, InstrItinClass itinD32, 3503 InstrItinClass itinQ16, InstrItinClass itinQ32, 3504 string OpcodeStr, string Dt, 3505 SDNode OpNode, bit Commutable = 0> { 3506 // 64-bit vector types. 3507 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3508 OpcodeStr, !strconcat(Dt, "8"), 3509 v8i8, v8i8, OpNode, Commutable>; 3510 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3511 OpcodeStr, !strconcat(Dt, "16"), 3512 v4i16, v4i16, OpNode, Commutable>; 3513 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3514 OpcodeStr, !strconcat(Dt, "32"), 3515 v2i32, v2i32, OpNode, Commutable>; 3516 3517 // 128-bit vector types. 3518 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3519 OpcodeStr, !strconcat(Dt, "8"), 3520 v16i8, v16i8, OpNode, Commutable>; 3521 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3522 OpcodeStr, !strconcat(Dt, "16"), 3523 v8i16, v8i16, OpNode, Commutable>; 3524 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3525 OpcodeStr, !strconcat(Dt, "32"), 3526 v4i32, v4i32, OpNode, Commutable>; 3527} 3528 3529multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3530 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3531 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3532 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3533 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3534 v4i32, v2i32, ShOp>; 3535} 3536 3537// ....then also with element size 64 bits: 3538multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3539 InstrItinClass itinD, InstrItinClass itinQ, 3540 string OpcodeStr, string Dt, 3541 SDNode OpNode, bit Commutable = 0> 3542 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3543 OpcodeStr, Dt, OpNode, Commutable> { 3544 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3545 OpcodeStr, !strconcat(Dt, "64"), 3546 v1i64, v1i64, OpNode, Commutable>; 3547 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3548 OpcodeStr, !strconcat(Dt, "64"), 3549 v2i64, v2i64, OpNode, Commutable>; 3550} 3551 3552 3553// Neon 3-register vector intrinsics. 3554 3555// First with only element sizes of 16 and 32 bits: 3556multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3557 InstrItinClass itinD16, InstrItinClass itinD32, 3558 InstrItinClass itinQ16, InstrItinClass itinQ32, 3559 string OpcodeStr, string Dt, 3560 SDPatternOperator IntOp, bit Commutable = 0> { 3561 // 64-bit vector types. 3562 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3563 OpcodeStr, !strconcat(Dt, "16"), 3564 v4i16, v4i16, IntOp, Commutable>; 3565 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3566 OpcodeStr, !strconcat(Dt, "32"), 3567 v2i32, v2i32, IntOp, Commutable>; 3568 3569 // 128-bit vector types. 3570 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3571 OpcodeStr, !strconcat(Dt, "16"), 3572 v8i16, v8i16, IntOp, Commutable>; 3573 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3574 OpcodeStr, !strconcat(Dt, "32"), 3575 v4i32, v4i32, IntOp, Commutable>; 3576} 3577multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3578 InstrItinClass itinD16, InstrItinClass itinD32, 3579 InstrItinClass itinQ16, InstrItinClass itinQ32, 3580 string OpcodeStr, string Dt, 3581 SDPatternOperator IntOp> { 3582 // 64-bit vector types. 3583 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3584 OpcodeStr, !strconcat(Dt, "16"), 3585 v4i16, v4i16, IntOp>; 3586 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3587 OpcodeStr, !strconcat(Dt, "32"), 3588 v2i32, v2i32, IntOp>; 3589 3590 // 128-bit vector types. 3591 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3592 OpcodeStr, !strconcat(Dt, "16"), 3593 v8i16, v8i16, IntOp>; 3594 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3595 OpcodeStr, !strconcat(Dt, "32"), 3596 v4i32, v4i32, IntOp>; 3597} 3598 3599multiclass N3VIntSL_HS<bits<4> op11_8, 3600 InstrItinClass itinD16, InstrItinClass itinD32, 3601 InstrItinClass itinQ16, InstrItinClass itinQ32, 3602 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3603 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3604 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3605 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3606 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3607 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3608 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3609 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3610 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3611} 3612 3613// ....then also with element size of 8 bits: 3614multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3615 InstrItinClass itinD16, InstrItinClass itinD32, 3616 InstrItinClass itinQ16, InstrItinClass itinQ32, 3617 string OpcodeStr, string Dt, 3618 SDPatternOperator IntOp, bit Commutable = 0> 3619 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3620 OpcodeStr, Dt, IntOp, Commutable> { 3621 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3622 OpcodeStr, !strconcat(Dt, "8"), 3623 v8i8, v8i8, IntOp, Commutable>; 3624 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3625 OpcodeStr, !strconcat(Dt, "8"), 3626 v16i8, v16i8, IntOp, Commutable>; 3627} 3628multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3629 InstrItinClass itinD16, InstrItinClass itinD32, 3630 InstrItinClass itinQ16, InstrItinClass itinQ32, 3631 string OpcodeStr, string Dt, 3632 SDPatternOperator IntOp> 3633 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3634 OpcodeStr, Dt, IntOp> { 3635 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3636 OpcodeStr, !strconcat(Dt, "8"), 3637 v8i8, v8i8, IntOp>; 3638 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3639 OpcodeStr, !strconcat(Dt, "8"), 3640 v16i8, v16i8, IntOp>; 3641} 3642 3643 3644// ....then also with element size of 64 bits: 3645multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3646 InstrItinClass itinD16, InstrItinClass itinD32, 3647 InstrItinClass itinQ16, InstrItinClass itinQ32, 3648 string OpcodeStr, string Dt, 3649 SDPatternOperator IntOp, bit Commutable = 0> 3650 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3651 OpcodeStr, Dt, IntOp, Commutable> { 3652 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3653 OpcodeStr, !strconcat(Dt, "64"), 3654 v1i64, v1i64, IntOp, Commutable>; 3655 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3656 OpcodeStr, !strconcat(Dt, "64"), 3657 v2i64, v2i64, IntOp, Commutable>; 3658} 3659multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3660 InstrItinClass itinD16, InstrItinClass itinD32, 3661 InstrItinClass itinQ16, InstrItinClass itinQ32, 3662 string OpcodeStr, string Dt, 3663 SDPatternOperator IntOp> 3664 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3665 OpcodeStr, Dt, IntOp> { 3666 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3667 OpcodeStr, !strconcat(Dt, "64"), 3668 v1i64, v1i64, IntOp>; 3669 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3670 OpcodeStr, !strconcat(Dt, "64"), 3671 v2i64, v2i64, IntOp>; 3672} 3673 3674// Neon Narrowing 3-register vector intrinsics, 3675// source operand element sizes of 16, 32 and 64 bits: 3676multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3677 string OpcodeStr, string Dt, 3678 SDPatternOperator IntOp, bit Commutable = 0> { 3679 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3680 OpcodeStr, !strconcat(Dt, "16"), 3681 v8i8, v8i16, IntOp, Commutable>; 3682 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3683 OpcodeStr, !strconcat(Dt, "32"), 3684 v4i16, v4i32, IntOp, Commutable>; 3685 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3686 OpcodeStr, !strconcat(Dt, "64"), 3687 v2i32, v2i64, IntOp, Commutable>; 3688} 3689 3690 3691// Neon Long 3-register vector operations. 3692 3693multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3694 InstrItinClass itin16, InstrItinClass itin32, 3695 string OpcodeStr, string Dt, 3696 SDNode OpNode, bit Commutable = 0> { 3697 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3698 OpcodeStr, !strconcat(Dt, "8"), 3699 v8i16, v8i8, OpNode, Commutable>; 3700 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3701 OpcodeStr, !strconcat(Dt, "16"), 3702 v4i32, v4i16, OpNode, Commutable>; 3703 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3704 OpcodeStr, !strconcat(Dt, "32"), 3705 v2i64, v2i32, OpNode, Commutable>; 3706} 3707 3708multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3709 InstrItinClass itin, string OpcodeStr, string Dt, 3710 SDNode OpNode> { 3711 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3712 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3713 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3714 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3715} 3716 3717multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3718 InstrItinClass itin16, InstrItinClass itin32, 3719 string OpcodeStr, string Dt, 3720 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> { 3721 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3722 OpcodeStr, !strconcat(Dt, "8"), 3723 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3724 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3725 OpcodeStr, !strconcat(Dt, "16"), 3726 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3727 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3728 OpcodeStr, !strconcat(Dt, "32"), 3729 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3730} 3731 3732// Neon Long 3-register vector intrinsics. 3733 3734// First with only element sizes of 16 and 32 bits: 3735multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3736 InstrItinClass itin16, InstrItinClass itin32, 3737 string OpcodeStr, string Dt, 3738 SDPatternOperator IntOp, bit Commutable = 0> { 3739 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3740 OpcodeStr, !strconcat(Dt, "16"), 3741 v4i32, v4i16, IntOp, Commutable>; 3742 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3743 OpcodeStr, !strconcat(Dt, "32"), 3744 v2i64, v2i32, IntOp, Commutable>; 3745} 3746 3747multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3748 InstrItinClass itin, string OpcodeStr, string Dt, 3749 SDPatternOperator IntOp> { 3750 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3751 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3752 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3753 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3754} 3755 3756// ....then also with element size of 8 bits: 3757multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3758 InstrItinClass itin16, InstrItinClass itin32, 3759 string OpcodeStr, string Dt, 3760 SDPatternOperator IntOp, bit Commutable = 0> 3761 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3762 IntOp, Commutable> { 3763 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3764 OpcodeStr, !strconcat(Dt, "8"), 3765 v8i16, v8i8, IntOp, Commutable>; 3766} 3767 3768// ....with explicit extend (VABDL). 3769multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3770 InstrItinClass itin, string OpcodeStr, string Dt, 3771 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3772 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3773 OpcodeStr, !strconcat(Dt, "8"), 3774 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3775 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3776 OpcodeStr, !strconcat(Dt, "16"), 3777 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3778 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3779 OpcodeStr, !strconcat(Dt, "32"), 3780 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3781} 3782 3783 3784// Neon Wide 3-register vector intrinsics, 3785// source operand element sizes of 8, 16 and 32 bits: 3786multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3787 string OpcodeStr, string Dt, 3788 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> { 3789 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3790 OpcodeStr, !strconcat(Dt, "8"), 3791 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3792 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3793 OpcodeStr, !strconcat(Dt, "16"), 3794 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3795 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3796 OpcodeStr, !strconcat(Dt, "32"), 3797 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3798} 3799 3800 3801// Neon Multiply-Op vector operations, 3802// element sizes of 8, 16 and 32 bits: 3803multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3804 InstrItinClass itinD16, InstrItinClass itinD32, 3805 InstrItinClass itinQ16, InstrItinClass itinQ32, 3806 string OpcodeStr, string Dt, SDNode OpNode> { 3807 // 64-bit vector types. 3808 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3809 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3810 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3811 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3812 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3813 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3814 3815 // 128-bit vector types. 3816 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3817 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3818 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3819 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3820 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3821 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3822} 3823 3824multiclass N3VMulOpSL_HS<bits<4> op11_8, 3825 InstrItinClass itinD16, InstrItinClass itinD32, 3826 InstrItinClass itinQ16, InstrItinClass itinQ32, 3827 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3828 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3829 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3830 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3831 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3832 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3833 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3834 mul, ShOp>; 3835 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3836 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3837 mul, ShOp>; 3838} 3839 3840// Neon Intrinsic-Op vector operations, 3841// element sizes of 8, 16 and 32 bits: 3842multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3843 InstrItinClass itinD, InstrItinClass itinQ, 3844 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3845 SDNode OpNode> { 3846 // 64-bit vector types. 3847 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3848 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3849 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3850 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3851 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3852 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3853 3854 // 128-bit vector types. 3855 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3856 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3857 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3858 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3859 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3860 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3861} 3862 3863// Neon 3-argument intrinsics, 3864// element sizes of 16 and 32 bits: 3865multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3866 InstrItinClass itinD16, InstrItinClass itinD32, 3867 InstrItinClass itinQ16, InstrItinClass itinQ32, 3868 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3869 // 64-bit vector types. 3870 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3871 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3872 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3873 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3874 3875 // 128-bit vector types. 3876 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3877 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3878 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3879 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3880} 3881 3882// element sizes of 8, 16 and 32 bits: 3883multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3884 InstrItinClass itinD16, InstrItinClass itinD32, 3885 InstrItinClass itinQ16, InstrItinClass itinQ32, 3886 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3887 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3888 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3889 // 64-bit vector types. 3890 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3891 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3892 // 128-bit vector types. 3893 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3894 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3895} 3896 3897// Neon Long Multiply-Op vector operations, 3898// element sizes of 8, 16 and 32 bits: 3899multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3900 InstrItinClass itin16, InstrItinClass itin32, 3901 string OpcodeStr, string Dt, SDNode MulOp, 3902 SDNode OpNode> { 3903 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3904 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3905 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3906 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3907 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3908 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3909} 3910 3911multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3912 string Dt, SDNode MulOp, SDNode OpNode> { 3913 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3914 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3915 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3916 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3917} 3918 3919 3920// Neon Long 3-argument intrinsics. 3921 3922// First with only element sizes of 16 and 32 bits: 3923multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3924 InstrItinClass itin16, InstrItinClass itin32, 3925 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3926 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3927 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3928 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3929 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3930} 3931 3932multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3933 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3934 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3935 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3936 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3937 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3938} 3939 3940// ....then also with element size of 8 bits: 3941multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3942 InstrItinClass itin16, InstrItinClass itin32, 3943 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3944 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3945 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3946 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3947} 3948 3949// ....with explicit extend (VABAL). 3950multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3951 InstrItinClass itin, string OpcodeStr, string Dt, 3952 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3953 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3954 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3955 IntOp, ExtOp, OpNode>; 3956 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3957 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3958 IntOp, ExtOp, OpNode>; 3959 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3960 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3961 IntOp, ExtOp, OpNode>; 3962} 3963 3964 3965// Neon Pairwise long 2-register intrinsics, 3966// element sizes of 8, 16 and 32 bits: 3967multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3968 bits<5> op11_7, bit op4, 3969 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3970 // 64-bit vector types. 3971 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3972 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3973 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3974 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3975 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3976 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3977 3978 // 128-bit vector types. 3979 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3980 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3981 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3982 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3983 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3984 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3985} 3986 3987 3988// Neon Pairwise long 2-register accumulate intrinsics, 3989// element sizes of 8, 16 and 32 bits: 3990multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3991 bits<5> op11_7, bit op4, 3992 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3993 // 64-bit vector types. 3994 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3995 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3996 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3997 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3998 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3999 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 4000 4001 // 128-bit vector types. 4002 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 4003 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 4004 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 4005 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 4006 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 4007 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 4008} 4009 4010 4011// Neon 2-register vector shift by immediate, 4012// with f of either N2RegVShLFrm or N2RegVShRFrm 4013// element sizes of 8, 16, 32 and 64 bits: 4014multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4015 InstrItinClass itin, string OpcodeStr, string Dt, 4016 SDNode OpNode> { 4017 // 64-bit vector types. 4018 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4019 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4020 let Inst{21-19} = 0b001; // imm6 = 001xxx 4021 } 4022 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4023 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4024 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4025 } 4026 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4027 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4028 let Inst{21} = 0b1; // imm6 = 1xxxxx 4029 } 4030 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 4031 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4032 // imm6 = xxxxxx 4033 4034 // 128-bit vector types. 4035 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4036 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4037 let Inst{21-19} = 0b001; // imm6 = 001xxx 4038 } 4039 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4040 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4041 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4042 } 4043 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4044 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4045 let Inst{21} = 0b1; // imm6 = 1xxxxx 4046 } 4047 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 4048 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4049 // imm6 = xxxxxx 4050} 4051multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4052 InstrItinClass itin, string OpcodeStr, string Dt, 4053 SDNode OpNode> { 4054 // 64-bit vector types. 4055 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4056 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4057 let Inst{21-19} = 0b001; // imm6 = 001xxx 4058 } 4059 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4060 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4061 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4062 } 4063 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4064 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4065 let Inst{21} = 0b1; // imm6 = 1xxxxx 4066 } 4067 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4068 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4069 // imm6 = xxxxxx 4070 4071 // 128-bit vector types. 4072 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4073 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4074 let Inst{21-19} = 0b001; // imm6 = 001xxx 4075 } 4076 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4077 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4078 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4079 } 4080 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4081 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4082 let Inst{21} = 0b1; // imm6 = 1xxxxx 4083 } 4084 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4085 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4086 // imm6 = xxxxxx 4087} 4088 4089// Neon Shift-Accumulate vector operations, 4090// element sizes of 8, 16, 32 and 64 bits: 4091multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4092 string OpcodeStr, string Dt, SDNode ShOp> { 4093 // 64-bit vector types. 4094 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4095 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 4096 let Inst{21-19} = 0b001; // imm6 = 001xxx 4097 } 4098 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4099 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 4100 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4101 } 4102 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4103 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 4104 let Inst{21} = 0b1; // imm6 = 1xxxxx 4105 } 4106 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4107 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 4108 // imm6 = xxxxxx 4109 4110 // 128-bit vector types. 4111 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4112 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 4113 let Inst{21-19} = 0b001; // imm6 = 001xxx 4114 } 4115 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4116 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 4117 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4118 } 4119 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4120 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 4121 let Inst{21} = 0b1; // imm6 = 1xxxxx 4122 } 4123 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4124 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 4125 // imm6 = xxxxxx 4126} 4127 4128// Neon Shift-Insert vector operations, 4129// with f of either N2RegVShLFrm or N2RegVShRFrm 4130// element sizes of 8, 16, 32 and 64 bits: 4131multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4132 string OpcodeStr> { 4133 // 64-bit vector types. 4134 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4135 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> { 4136 let Inst{21-19} = 0b001; // imm6 = 001xxx 4137 } 4138 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4139 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> { 4140 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4141 } 4142 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4143 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> { 4144 let Inst{21} = 0b1; // imm6 = 1xxxxx 4145 } 4146 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 4147 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>; 4148 // imm6 = xxxxxx 4149 4150 // 128-bit vector types. 4151 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4152 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> { 4153 let Inst{21-19} = 0b001; // imm6 = 001xxx 4154 } 4155 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4156 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> { 4157 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4158 } 4159 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4160 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> { 4161 let Inst{21} = 0b1; // imm6 = 1xxxxx 4162 } 4163 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4164 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>; 4165 // imm6 = xxxxxx 4166} 4167multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4168 string OpcodeStr> { 4169 // 64-bit vector types. 4170 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4171 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> { 4172 let Inst{21-19} = 0b001; // imm6 = 001xxx 4173 } 4174 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4175 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> { 4176 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4177 } 4178 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4179 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> { 4180 let Inst{21} = 0b1; // imm6 = 1xxxxx 4181 } 4182 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4183 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>; 4184 // imm6 = xxxxxx 4185 4186 // 128-bit vector types. 4187 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4188 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> { 4189 let Inst{21-19} = 0b001; // imm6 = 001xxx 4190 } 4191 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4192 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> { 4193 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4194 } 4195 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4196 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> { 4197 let Inst{21} = 0b1; // imm6 = 1xxxxx 4198 } 4199 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4200 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>; 4201 // imm6 = xxxxxx 4202} 4203 4204// Neon Shift Long operations, 4205// element sizes of 8, 16, 32 bits: 4206multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4207 bit op4, string OpcodeStr, string Dt, 4208 SDPatternOperator OpNode> { 4209 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4210 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4211 let Inst{21-19} = 0b001; // imm6 = 001xxx 4212 } 4213 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4214 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4215 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4216 } 4217 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4218 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4219 let Inst{21} = 0b1; // imm6 = 1xxxxx 4220 } 4221} 4222 4223// Neon Shift Narrow operations, 4224// element sizes of 16, 32, 64 bits: 4225multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4226 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4227 SDPatternOperator OpNode> { 4228 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4229 OpcodeStr, !strconcat(Dt, "16"), 4230 v8i8, v8i16, shr_imm8, OpNode> { 4231 let Inst{21-19} = 0b001; // imm6 = 001xxx 4232 } 4233 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4234 OpcodeStr, !strconcat(Dt, "32"), 4235 v4i16, v4i32, shr_imm16, OpNode> { 4236 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4237 } 4238 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4239 OpcodeStr, !strconcat(Dt, "64"), 4240 v2i32, v2i64, shr_imm32, OpNode> { 4241 let Inst{21} = 0b1; // imm6 = 1xxxxx 4242 } 4243} 4244 4245//===----------------------------------------------------------------------===// 4246// Instruction Definitions. 4247//===----------------------------------------------------------------------===// 4248 4249// Vector Add Operations. 4250 4251// VADD : Vector Add (integer and floating-point) 4252defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4253 add, 1>; 4254def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4255 v2f32, v2f32, fadd, 1>; 4256def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4257 v4f32, v4f32, fadd, 1>; 4258def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", 4259 v4f16, v4f16, fadd, 1>, 4260 Requires<[HasNEON,HasFullFP16]>; 4261def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", 4262 v8f16, v8f16, fadd, 1>, 4263 Requires<[HasNEON,HasFullFP16]>; 4264// VADDL : Vector Add Long (Q = D + D) 4265defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4266 "vaddl", "s", add, sext, 1>; 4267defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4268 "vaddl", "u", add, zanyext, 1>; 4269// VADDW : Vector Add Wide (Q = Q + D) 4270defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4271defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>; 4272// VHADD : Vector Halving Add 4273defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4274 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4275 "vhadd", "s", int_arm_neon_vhadds, 1>; 4276defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4277 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4278 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4279// VRHADD : Vector Rounding Halving Add 4280defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4281 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4282 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4283defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4284 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4285 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4286// VQADD : Vector Saturating Add 4287defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4288 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4289 "vqadd", "s", saddsat, 1>; 4290defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4291 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4292 "vqadd", "u", uaddsat, 1>; 4293// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4294defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4295// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4296defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4297 int_arm_neon_vraddhn, 1>; 4298 4299let Predicates = [HasNEON] in { 4300def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4301 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4302def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4303 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4304def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4305 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4306} 4307 4308// Vector Multiply Operations. 4309 4310// VMUL : Vector Multiply (integer, polynomial and floating-point) 4311defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4312 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4313def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4314 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4315def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4316 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4317def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4318 v2f32, v2f32, fmul, 1>; 4319def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4320 v4f32, v4f32, fmul, 1>; 4321def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", 4322 v4f16, v4f16, fmul, 1>, 4323 Requires<[HasNEON,HasFullFP16]>; 4324def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", 4325 v8f16, v8f16, fmul, 1>, 4326 Requires<[HasNEON,HasFullFP16]>; 4327defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4328def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4329def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4330 v2f32, fmul>; 4331def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, 4332 Requires<[HasNEON,HasFullFP16]>; 4333def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, 4334 v4f16, fmul>, 4335 Requires<[HasNEON,HasFullFP16]>; 4336 4337let Predicates = [HasNEON] in { 4338def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4339 (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))), 4340 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4341 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4342 (DSubReg_i16_reg imm:$lane))), 4343 (SubReg_i16_lane imm:$lane)))>; 4344def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4345 (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))), 4346 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4347 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4348 (DSubReg_i32_reg imm:$lane))), 4349 (SubReg_i32_lane imm:$lane)))>; 4350def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4351 (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))), 4352 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4353 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4354 (DSubReg_i32_reg imm:$lane))), 4355 (SubReg_i32_lane imm:$lane)))>; 4356def : Pat<(v8f16 (fmul (v8f16 QPR:$src1), 4357 (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))), 4358 (v8f16 (VMULslhq(v8f16 QPR:$src1), 4359 (v4f16 (EXTRACT_SUBREG QPR:$src2, 4360 (DSubReg_i16_reg imm:$lane))), 4361 (SubReg_i16_lane imm:$lane)))>; 4362 4363def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4364 (VMULslfd DPR:$Rn, 4365 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4366 (i32 0))>; 4367def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4368 (VMULslhd DPR:$Rn, 4369 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4370 (i32 0))>; 4371def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4372 (VMULslfq QPR:$Rn, 4373 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4374 (i32 0))>; 4375def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4376 (VMULslhq QPR:$Rn, 4377 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4378 (i32 0))>; 4379} 4380 4381// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4382defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4383 IIC_VMULi16Q, IIC_VMULi32Q, 4384 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4385defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4386 IIC_VMULi16Q, IIC_VMULi32Q, 4387 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4388 4389let Predicates = [HasNEON] in { 4390def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4391 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4392 imm:$lane)))), 4393 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4394 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4395 (DSubReg_i16_reg imm:$lane))), 4396 (SubReg_i16_lane imm:$lane)))>; 4397def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4398 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4399 imm:$lane)))), 4400 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4401 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4402 (DSubReg_i32_reg imm:$lane))), 4403 (SubReg_i32_lane imm:$lane)))>; 4404} 4405 4406// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4407defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4408 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4409 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4410defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4411 IIC_VMULi16Q, IIC_VMULi32Q, 4412 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4413 4414let Predicates = [HasNEON] in { 4415def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4416 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4417 imm:$lane)))), 4418 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4419 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4420 (DSubReg_i16_reg imm:$lane))), 4421 (SubReg_i16_lane imm:$lane)))>; 4422def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4423 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4424 imm:$lane)))), 4425 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4426 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4427 (DSubReg_i32_reg imm:$lane))), 4428 (SubReg_i32_lane imm:$lane)))>; 4429} 4430 4431// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4432let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4433 DecoderNamespace = "NEONData" in { 4434 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4435 "vmull", "s", ARMvmulls, 1>; 4436 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4437 "vmull", "u", ARMvmullu, 1>; 4438 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4439 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4440 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4441 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4442 Requires<[HasV8, HasAES]>; 4443} 4444defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>; 4445defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>; 4446 4447// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4448defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4449 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4450defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4451 "vqdmull", "s", int_arm_neon_vqdmull>; 4452 4453// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4454 4455// VMLA : Vector Multiply Accumulate (integer and floating-point) 4456defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4457 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4458def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4459 v2f32, fmul_su, fadd_mlx>, 4460 Requires<[HasNEON, UseFPVMLx]>; 4461def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4462 v4f32, fmul_su, fadd_mlx>, 4463 Requires<[HasNEON, UseFPVMLx]>; 4464def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", 4465 v4f16, fmul_su, fadd_mlx>, 4466 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4467def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", 4468 v8f16, fmul_su, fadd_mlx>, 4469 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4470defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4471 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4472def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4473 v2f32, fmul_su, fadd_mlx>, 4474 Requires<[HasNEON, UseFPVMLx]>; 4475def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4476 v4f32, v2f32, fmul_su, fadd_mlx>, 4477 Requires<[HasNEON, UseFPVMLx]>; 4478def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", 4479 v4f16, fmul, fadd>, 4480 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4481def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", 4482 v8f16, v4f16, fmul, fadd>, 4483 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4484 4485let Predicates = [HasNEON] in { 4486def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4487 (mul (v8i16 QPR:$src2), 4488 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4489 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4490 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4491 (DSubReg_i16_reg imm:$lane))), 4492 (SubReg_i16_lane imm:$lane)))>; 4493 4494def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4495 (mul (v4i32 QPR:$src2), 4496 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4497 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4498 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4499 (DSubReg_i32_reg imm:$lane))), 4500 (SubReg_i32_lane imm:$lane)))>; 4501} 4502 4503def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4504 (fmul_su (v4f32 QPR:$src2), 4505 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4506 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4507 (v4f32 QPR:$src2), 4508 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4509 (DSubReg_i32_reg imm:$lane))), 4510 (SubReg_i32_lane imm:$lane)))>, 4511 Requires<[HasNEON, UseFPVMLx]>; 4512 4513// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4514defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4515 "vmlal", "s", ARMvmulls, add>; 4516defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4517 "vmlal", "u", ARMvmullu, add>; 4518 4519defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>; 4520defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>; 4521 4522let Predicates = [HasNEON, HasV8_1a] in { 4523 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4524 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4525 // (Q += D * D) 4526 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4527 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4528 null_frag>; 4529 def : Pat<(v4i16 (int_arm_neon_vqrdmlah (v4i16 DPR:$src1), (v4i16 DPR:$Vn), 4530 (v4i16 DPR:$Vm))), 4531 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4532 def : Pat<(v2i32 (int_arm_neon_vqrdmlah (v2i32 DPR:$src1), (v2i32 DPR:$Vn), 4533 (v2i32 DPR:$Vm))), 4534 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4535 def : Pat<(v8i16 (int_arm_neon_vqrdmlah (v8i16 QPR:$src1), (v8i16 QPR:$Vn), 4536 (v8i16 QPR:$Vm))), 4537 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4538 def : Pat<(v4i32 (int_arm_neon_vqrdmlah (v4i32 QPR:$src1), (v4i32 QPR:$Vn), 4539 (v4i32 QPR:$Vm))), 4540 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4541 4542 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4543 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4544 null_frag>; 4545 def : Pat<(v4i16 (int_arm_neon_vqrdmlah (v4i16 DPR:$src1), 4546 (v4i16 DPR:$Vn), 4547 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4548 imm:$lane)))), 4549 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4550 imm:$lane))>; 4551 def : Pat<(v2i32 (int_arm_neon_vqrdmlah (v2i32 DPR:$src1), 4552 (v2i32 DPR:$Vn), 4553 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4554 imm:$lane)))), 4555 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4556 imm:$lane))>; 4557 def : Pat<(v8i16 (int_arm_neon_vqrdmlah (v8i16 QPR:$src1), 4558 (v8i16 QPR:$src2), 4559 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4560 imm:$lane)))), 4561 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4562 (v8i16 QPR:$src2), 4563 (v4i16 (EXTRACT_SUBREG 4564 QPR:$src3, 4565 (DSubReg_i16_reg imm:$lane))), 4566 (SubReg_i16_lane imm:$lane)))>; 4567 def : Pat<(v4i32 (int_arm_neon_vqrdmlah (v4i32 QPR:$src1), 4568 (v4i32 QPR:$src2), 4569 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4570 imm:$lane)))), 4571 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4572 (v4i32 QPR:$src2), 4573 (v2i32 (EXTRACT_SUBREG 4574 QPR:$src3, 4575 (DSubReg_i32_reg imm:$lane))), 4576 (SubReg_i32_lane imm:$lane)))>; 4577 4578 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4579 // (Q -= D * D) 4580 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4581 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4582 null_frag>; 4583 def : Pat<(v4i16 (int_arm_neon_vqrdmlsh (v4i16 DPR:$src1), (v4i16 DPR:$Vn), 4584 (v4i16 DPR:$Vm))), 4585 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4586 def : Pat<(v2i32 (int_arm_neon_vqrdmlsh (v2i32 DPR:$src1), (v2i32 DPR:$Vn), 4587 (v2i32 DPR:$Vm))), 4588 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4589 def : Pat<(v8i16 (int_arm_neon_vqrdmlsh (v8i16 QPR:$src1), (v8i16 QPR:$Vn), 4590 (v8i16 QPR:$Vm))), 4591 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4592 def : Pat<(v4i32 (int_arm_neon_vqrdmlsh (v4i32 QPR:$src1), (v4i32 QPR:$Vn), 4593 (v4i32 QPR:$Vm))), 4594 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4595 4596 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4597 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4598 null_frag>; 4599 def : Pat<(v4i16 (int_arm_neon_vqrdmlsh (v4i16 DPR:$src1), 4600 (v4i16 DPR:$Vn), 4601 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4602 imm:$lane)))), 4603 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4604 def : Pat<(v2i32 (int_arm_neon_vqrdmlsh (v2i32 DPR:$src1), 4605 (v2i32 DPR:$Vn), 4606 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4607 imm:$lane)))), 4608 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4609 imm:$lane))>; 4610 def : Pat<(v8i16 (int_arm_neon_vqrdmlsh (v8i16 QPR:$src1), 4611 (v8i16 QPR:$src2), 4612 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4613 imm:$lane)))), 4614 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4615 (v8i16 QPR:$src2), 4616 (v4i16 (EXTRACT_SUBREG 4617 QPR:$src3, 4618 (DSubReg_i16_reg imm:$lane))), 4619 (SubReg_i16_lane imm:$lane)))>; 4620 def : Pat<(v4i32 (int_arm_neon_vqrdmlsh (v4i32 QPR:$src1), 4621 (v4i32 QPR:$src2), 4622 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4623 imm:$lane)))), 4624 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4625 (v4i32 QPR:$src2), 4626 (v2i32 (EXTRACT_SUBREG 4627 QPR:$src3, 4628 (DSubReg_i32_reg imm:$lane))), 4629 (SubReg_i32_lane imm:$lane)))>; 4630} 4631// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4632defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4633 "vqdmlal", "s", null_frag>; 4634defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4635 4636let Predicates = [HasNEON] in { 4637def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4638 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4639 (v4i16 DPR:$Vm))))), 4640 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4641def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4642 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4643 (v2i32 DPR:$Vm))))), 4644 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4645def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4646 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4647 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4648 imm:$lane)))))), 4649 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4650def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4651 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4652 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4653 imm:$lane)))))), 4654 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4655} 4656 4657// VMLS : Vector Multiply Subtract (integer and floating-point) 4658defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4659 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4660def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4661 v2f32, fmul_su, fsub_mlx>, 4662 Requires<[HasNEON, UseFPVMLx]>; 4663def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4664 v4f32, fmul_su, fsub_mlx>, 4665 Requires<[HasNEON, UseFPVMLx]>; 4666def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", 4667 v4f16, fmul, fsub>, 4668 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4669def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", 4670 v8f16, fmul, fsub>, 4671 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4672defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4673 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4674def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4675 v2f32, fmul_su, fsub_mlx>, 4676 Requires<[HasNEON, UseFPVMLx]>; 4677def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4678 v4f32, v2f32, fmul_su, fsub_mlx>, 4679 Requires<[HasNEON, UseFPVMLx]>; 4680def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", 4681 v4f16, fmul, fsub>, 4682 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4683def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", 4684 v8f16, v4f16, fmul, fsub>, 4685 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4686 4687let Predicates = [HasNEON] in { 4688def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4689 (mul (v8i16 QPR:$src2), 4690 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4691 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4692 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4693 (DSubReg_i16_reg imm:$lane))), 4694 (SubReg_i16_lane imm:$lane)))>; 4695 4696def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4697 (mul (v4i32 QPR:$src2), 4698 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4699 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4700 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4701 (DSubReg_i32_reg imm:$lane))), 4702 (SubReg_i32_lane imm:$lane)))>; 4703} 4704 4705def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4706 (fmul_su (v4f32 QPR:$src2), 4707 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4708 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4709 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4710 (DSubReg_i32_reg imm:$lane))), 4711 (SubReg_i32_lane imm:$lane)))>, 4712 Requires<[HasNEON, UseFPVMLx]>; 4713 4714// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4715defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4716 "vmlsl", "s", ARMvmulls, sub>; 4717defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4718 "vmlsl", "u", ARMvmullu, sub>; 4719 4720defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>; 4721defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>; 4722 4723// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4724defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4725 "vqdmlsl", "s", null_frag>; 4726defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4727 4728let Predicates = [HasNEON] in { 4729def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4730 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4731 (v4i16 DPR:$Vm))))), 4732 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4733def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4734 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4735 (v2i32 DPR:$Vm))))), 4736 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4737def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4738 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4739 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4740 imm:$lane)))))), 4741 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4742def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4743 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4744 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4745 imm:$lane)))))), 4746 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4747} 4748 4749// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4750def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4751 v2f32, fmul_su, fadd_mlx>, 4752 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4753 4754def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4755 v4f32, fmul_su, fadd_mlx>, 4756 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4757def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", 4758 v4f16, fmul, fadd>, 4759 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4760 4761def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", 4762 v8f16, fmul, fadd>, 4763 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4764 4765// Fused Vector Multiply Subtract (floating-point) 4766def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4767 v2f32, fmul_su, fsub_mlx>, 4768 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4769def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4770 v4f32, fmul_su, fsub_mlx>, 4771 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4772def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", 4773 v4f16, fmul, fsub>, 4774 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4775def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", 4776 v8f16, fmul, fsub>, 4777 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4778 4779// Match @llvm.fma.* intrinsics 4780def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4781 (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4782 Requires<[HasNEON,HasFullFP16]>; 4783def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4784 (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4785 Requires<[HasNEON,HasFullFP16]>; 4786def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4787 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4788 Requires<[HasNEON,HasVFP4]>; 4789def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4790 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4791 Requires<[HasNEON,HasVFP4]>; 4792def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4793 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4794 Requires<[HasNEON,HasVFP4]>; 4795def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4796 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4797 Requires<[HasNEON,HasVFP4]>; 4798 4799// ARMv8.2a dot product instructions. 4800// We put them in the VFPV8 decoder namespace because the ARM and Thumb 4801// encodings are the same and thus no further bit twiddling is necessary 4802// in the disassembler. 4803class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm, 4804 string AsmTy, ValueType AccumTy, ValueType InputTy, 4805 SDPatternOperator OpNode> : 4806 N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst), 4807 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, 4808 Asm, AsmTy, 4809 [(set (AccumTy RegTy:$dst), 4810 (OpNode (AccumTy RegTy:$Vd), 4811 (InputTy RegTy:$Vn), 4812 (InputTy RegTy:$Vm)))]> { 4813 let Predicates = [HasDotProd]; 4814 let DecoderNamespace = "VFPV8"; 4815 let Constraints = "$dst = $Vd"; 4816} 4817 4818def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>; 4819def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>; 4820def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>; 4821def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>; 4822 4823// Indexed dot product instructions: 4824multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty, 4825 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode, 4826 dag RHS> { 4827 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst), 4828 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 4829 N3RegFrm, IIC_VDOTPROD, opc, dt, []> { 4830 bit lane; 4831 let Inst{5} = lane; 4832 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); 4833 let Constraints = "$dst = $Vd"; 4834 let Predicates = [HasDotProd]; 4835 let DecoderNamespace = "VFPV8"; 4836 } 4837 4838 def : Pat< 4839 (AccumType (OpNode (AccumType Ty:$Vd), 4840 (InputType Ty:$Vn), 4841 (InputType (bitconvert (AccumType 4842 (ARMvduplane (AccumType Ty:$Vm), 4843 VectorIndex32:$lane)))))), 4844 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>; 4845} 4846 4847defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8, 4848 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>; 4849defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8, 4850 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>; 4851defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8, 4852 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4853defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8, 4854 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4855 4856// v8.6A matrix multiplication extension 4857let Predicates = [HasMatMulInt8] in { 4858 class N3VMatMul<bit B, bit U, string Asm, string AsmTy, 4859 SDPatternOperator OpNode> 4860 : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst), 4861 (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary, 4862 Asm, AsmTy, 4863 [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd), 4864 (v16i8 QPR:$Vn), 4865 (v16i8 QPR:$Vm)))]> { 4866 let DecoderNamespace = "VFPV8"; 4867 let Constraints = "$dst = $Vd"; 4868 } 4869 4870 multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy, 4871 ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode, 4872 dag RHS> { 4873 4874 def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst), 4875 (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm, 4876 NoItinerary, Asm, AsmTy, []> { 4877 bit lane; 4878 let Inst{5} = lane; 4879 let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane"); 4880 let DecoderNamespace = "VFPV8"; 4881 let Constraints = "$dst = $Vd"; 4882 } 4883 4884 def : Pat< 4885 (AccumTy (OpNode (AccumTy RegTy:$Vd), 4886 (InputTy RegTy:$Vn), 4887 (InputTy (bitconvert (AccumTy 4888 (ARMvduplane (AccumTy RegTy:$Vm), 4889 VectorIndex32:$lane)))))), 4890 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4891 4892 } 4893 4894 multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS> 4895 : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> { 4896 def : Pat< 4897 (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd), 4898 (InputTy (bitconvert (AccumTy 4899 (ARMvduplane (AccumTy RegTy:$Vm), 4900 VectorIndex32:$lane)))), 4901 (InputTy RegTy:$Vn))), 4902 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4903 } 4904 4905 def VSMMLA : N3VMatMul<0, 0, "vsmmla", "s8", int_arm_neon_smmla>; 4906 def VUMMLA : N3VMatMul<0, 1, "vummla", "u8", int_arm_neon_ummla>; 4907 def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>; 4908 def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8, int_arm_neon_usdot>; 4909 def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>; 4910 4911 defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8, 4912 int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>; 4913 defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8, 4914 int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4915 defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>; 4916 defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4917} 4918 4919// ARMv8.3 complex operations 4920class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q, 4921 InstrItinClass itin, dag oops, dag iops, 4922 string opc, string dt, list<dag> pattern> 4923 : N3VCP8<{?,?}, {op21,s}, q, op4, oops, 4924 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ 4925 bits<2> rot; 4926 let Inst{24-23} = rot; 4927} 4928 4929class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q, 4930 InstrItinClass itin, dag oops, dag iops, string opc, 4931 string dt, list<dag> pattern> 4932 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, 4933 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { 4934 bits<1> rot; 4935 let Inst{24} = rot; 4936} 4937 4938class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin, 4939 dag oops, dag iops, string opc, string dt, 4940 list<dag> pattern> 4941 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4942 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4943 bits<2> rot; 4944 bit lane; 4945 4946 let Inst{21-20} = rot; 4947 let Inst{5} = lane; 4948} 4949 4950class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin, 4951 dag oops, dag iops, string opc, string dt, 4952 list<dag> pattern> 4953 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4954 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4955 bits<2> rot; 4956 bit lane; 4957 4958 let Inst{21-20} = rot; 4959 let Inst{5} = Vm{4}; 4960 // This is needed because the lane operand does not have any bits in the 4961 // encoding (it only has one possible value), so we need to manually set it 4962 // to it's default value. 4963 let DecoderMethod = "DecodeNEONComplexLane64Instruction"; 4964} 4965 4966multiclass N3VCP8ComplexTied<bit op21, bit op4, 4967 string OpcodeStr> { 4968 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4969 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd), 4970 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4971 OpcodeStr, "f16", []>; 4972 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd), 4973 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4974 OpcodeStr, "f16", []>; 4975 } 4976 let Predicates = [HasNEON,HasV8_3a] in { 4977 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd), 4978 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4979 OpcodeStr, "f32", []>; 4980 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd), 4981 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4982 OpcodeStr, "f32", []>; 4983 } 4984} 4985 4986multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4, 4987 string OpcodeStr> { 4988 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4989 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD, 4990 (outs DPR:$Vd), 4991 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4992 OpcodeStr, "f16", []>; 4993 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ, 4994 (outs QPR:$Vd), 4995 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4996 OpcodeStr, "f16", []>; 4997 } 4998 let Predicates = [HasNEON,HasV8_3a] in { 4999 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD, 5000 (outs DPR:$Vd), 5001 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 5002 OpcodeStr, "f32", []>; 5003 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ, 5004 (outs QPR:$Vd), 5005 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 5006 OpcodeStr, "f32", []>; 5007 } 5008} 5009 5010// These instructions index by pairs of lanes, so the VectorIndexes are twice 5011// as wide as the data types. 5012multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr> { 5013 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5014 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD, 5015 (outs DPR:$Vd), 5016 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 5017 VectorIndex32:$lane, complexrotateop:$rot), 5018 OpcodeStr, "f16", []>; 5019 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ, 5020 (outs QPR:$Vd), 5021 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, 5022 VectorIndex32:$lane, complexrotateop:$rot), 5023 OpcodeStr, "f16", []>; 5024 } 5025 let Predicates = [HasNEON,HasV8_3a] in { 5026 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD, 5027 (outs DPR:$Vd), 5028 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5029 complexrotateop:$rot), 5030 OpcodeStr, "f32", []>; 5031 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ, 5032 (outs QPR:$Vd), 5033 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5034 complexrotateop:$rot), 5035 OpcodeStr, "f32", []>; 5036 } 5037} 5038 5039defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla">; 5040defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd">; 5041defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla">; 5042 5043let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5044 def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5045 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>; 5046 def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5047 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>; 5048 def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5049 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>; 5050 def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5051 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>; 5052} 5053let Predicates = [HasNEON,HasV8_3a] in { 5054 def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5055 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>; 5056 def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5057 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>; 5058 def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5059 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>; 5060 def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5061 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>; 5062} 5063 5064// Vector Subtract Operations. 5065 5066// VSUB : Vector Subtract (integer and floating-point) 5067defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 5068 "vsub", "i", sub, 0>; 5069def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 5070 v2f32, v2f32, fsub, 0>; 5071def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 5072 v4f32, v4f32, fsub, 0>; 5073def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", 5074 v4f16, v4f16, fsub, 0>, 5075 Requires<[HasNEON,HasFullFP16]>; 5076def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", 5077 v8f16, v8f16, fsub, 0>, 5078 Requires<[HasNEON,HasFullFP16]>; 5079// VSUBL : Vector Subtract Long (Q = D - D) 5080defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5081 "vsubl", "s", sub, sext, 0>; 5082defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5083 "vsubl", "u", sub, zanyext, 0>; 5084// VSUBW : Vector Subtract Wide (Q = Q - D) 5085defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 5086defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>; 5087// VHSUB : Vector Halving Subtract 5088defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 5089 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5090 "vhsub", "s", int_arm_neon_vhsubs, 0>; 5091defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 5092 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5093 "vhsub", "u", int_arm_neon_vhsubu, 0>; 5094// VQSUB : Vector Saturing Subtract 5095defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 5096 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5097 "vqsub", "s", ssubsat, 0>; 5098defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 5099 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5100 "vqsub", "u", usubsat, 0>; 5101// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 5102defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 5103// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 5104defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 5105 int_arm_neon_vrsubhn, 0>; 5106 5107let Predicates = [HasNEON] in { 5108def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 5109 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 5110def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 5111 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 5112def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 5113 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 5114} 5115 5116// Vector Comparisons. 5117 5118// VCEQ : Vector Compare Equal 5119defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5120 IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>; 5121def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 5122 ARMCCeq, 1>; 5123def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 5124 ARMCCeq, 1>; 5125def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, 5126 ARMCCeq, 1>, 5127 Requires<[HasNEON, HasFullFP16]>; 5128def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, 5129 ARMCCeq, 1>, 5130 Requires<[HasNEON, HasFullFP16]>; 5131 5132let TwoOperandAliasConstraint = "$Vm = $Vd" in 5133defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 5134 "$Vd, $Vm, #0", ARMCCeq>; 5135 5136// VCGE : Vector Compare Greater Than or Equal 5137defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5138 IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>; 5139defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5140 IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>; 5141def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 5142 ARMCCge, 0>; 5143def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 5144 ARMCCge, 0>; 5145def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, 5146 ARMCCge, 0>, 5147 Requires<[HasNEON, HasFullFP16]>; 5148def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, 5149 ARMCCge, 0>, 5150 Requires<[HasNEON, HasFullFP16]>; 5151 5152let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5153defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 5154 "$Vd, $Vm, #0", ARMCCge>; 5155defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 5156 "$Vd, $Vm, #0", ARMCCle>; 5157} 5158 5159// VCGT : Vector Compare Greater Than 5160defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5161 IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>; 5162defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5163 IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>; 5164def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 5165 ARMCCgt, 0>; 5166def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 5167 ARMCCgt, 0>; 5168def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, 5169 ARMCCgt, 0>, 5170 Requires<[HasNEON, HasFullFP16]>; 5171def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, 5172 ARMCCgt, 0>, 5173 Requires<[HasNEON, HasFullFP16]>; 5174 5175let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5176defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 5177 "$Vd, $Vm, #0", ARMCCgt>; 5178defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 5179 "$Vd, $Vm, #0", ARMCClt>; 5180} 5181 5182// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 5183def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5184 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 5185def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5186 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 5187def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5188 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, 5189 Requires<[HasNEON, HasFullFP16]>; 5190def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5191 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, 5192 Requires<[HasNEON, HasFullFP16]>; 5193// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 5194def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5195 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 5196def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5197 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 5198def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5199 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, 5200 Requires<[HasNEON, HasFullFP16]>; 5201def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5202 "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>, 5203 Requires<[HasNEON, HasFullFP16]>; 5204// VTST : Vector Test Bits 5205defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 5206 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 5207 5208def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5209 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5210def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5211 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5212def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5213 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5214def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5215 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5216let Predicates = [HasNEON, HasFullFP16] in { 5217def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5218 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5219def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5220 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5221def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5222 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5223def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5224 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5225} 5226 5227// +fp16fml Floating Point Multiplication Variants 5228let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in { 5229 5230class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn, 5231 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5232 : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5233 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5234 5235class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn, 5236 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5237 : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5238 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5239 5240// Vd, Vs, Vs[0-15], Idx[0-1] 5241class VFMD<string opc, string type, bits<2> S> 5242 : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd), 5243 (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx), 5244 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5245 bit idx; 5246 let Inst{3} = idx; 5247 let Inst{19-16} = Vn{4-1}; 5248 let Inst{7} = Vn{0}; 5249 let Inst{5} = Vm{0}; 5250 let Inst{2-0} = Vm{3-1}; 5251} 5252 5253// Vq, Vd, Vd[0-7], Idx[0-3] 5254class VFMQ<string opc, string type, bits<2> S> 5255 : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd), 5256 (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 5257 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5258 bits<2> idx; 5259 let Inst{5} = idx{1}; 5260 let Inst{3} = idx{0}; 5261} 5262 5263// op1 op2 op3 5264def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>; 5265def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>; 5266def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>; 5267def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>; 5268def VFMALDI : VFMD<"vfmal", "f16", 0b00>; 5269def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>; 5270def VFMALQI : VFMQ<"vfmal", "f16", 0b00>; 5271def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>; 5272} // HasNEON, HasFP16FML 5273 5274 5275def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5276 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5277def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5278 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5279def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5280 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5281def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5282 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5283let Predicates = [HasNEON, HasFullFP16] in { 5284def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5285 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5286def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5287 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5288def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5289 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5290def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5291 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5292} 5293 5294// Vector Bitwise Operations. 5295 5296def vnotd : PatFrag<(ops node:$in), 5297 (xor node:$in, ARMimmAllOnesD)>; 5298def vnotq : PatFrag<(ops node:$in), 5299 (xor node:$in, ARMimmAllOnesV)>; 5300 5301 5302// VAND : Vector Bitwise AND 5303def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 5304 v2i32, v2i32, and, 1>; 5305def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 5306 v4i32, v4i32, and, 1>; 5307 5308// VEOR : Vector Bitwise Exclusive OR 5309def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 5310 v2i32, v2i32, xor, 1>; 5311def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 5312 v4i32, v4i32, xor, 1>; 5313 5314// VORR : Vector Bitwise OR 5315def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 5316 v2i32, v2i32, or, 1>; 5317def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 5318 v4i32, v4i32, or, 1>; 5319 5320multiclass BitwisePatterns<string Name, SDPatternOperator OpNodeD, 5321 SDPatternOperator OpNodeQ> { 5322 def : Pat<(v8i8 (OpNodeD DPR:$LHS, DPR:$RHS)), 5323 (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>; 5324 def : Pat<(v4i16 (OpNodeD DPR:$LHS, DPR:$RHS)), 5325 (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>; 5326 def : Pat<(v1i64 (OpNodeD DPR:$LHS, DPR:$RHS)), 5327 (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>; 5328 5329 def : Pat<(v16i8 (OpNodeQ QPR:$LHS, QPR:$RHS)), 5330 (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>; 5331 def : Pat<(v8i16 (OpNodeQ QPR:$LHS, QPR:$RHS)), 5332 (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>; 5333 def : Pat<(v2i64 (OpNodeQ QPR:$LHS, QPR:$RHS)), 5334 (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>; 5335} 5336 5337let Predicates = [HasNEON] in { 5338 defm : BitwisePatterns<"VAND", and, and>; 5339 defm : BitwisePatterns<"VORR", or, or>; 5340 defm : BitwisePatterns<"VEOR", xor, xor>; 5341} 5342 5343def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 5344 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5345 IIC_VMOVImm, 5346 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5347 [(set DPR:$Vd, 5348 (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5349 let Inst{9} = SIMM{9}; 5350} 5351 5352def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 5353 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5354 IIC_VMOVImm, 5355 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5356 [(set DPR:$Vd, 5357 (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5358 let Inst{10-9} = SIMM{10-9}; 5359} 5360 5361def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 5362 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5363 IIC_VMOVImm, 5364 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5365 [(set QPR:$Vd, 5366 (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5367 let Inst{9} = SIMM{9}; 5368} 5369 5370def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 5371 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5372 IIC_VMOVImm, 5373 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5374 [(set QPR:$Vd, 5375 (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5376 let Inst{10-9} = SIMM{10-9}; 5377} 5378 5379 5380// VBIC : Vector Bitwise Bit Clear (AND NOT) 5381let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5382def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5383 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5384 "vbic", "$Vd, $Vn, $Vm", "", 5385 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 5386 (vnotd DPR:$Vm))))]>; 5387def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5388 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5389 "vbic", "$Vd, $Vn, $Vm", "", 5390 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 5391 (vnotq QPR:$Vm))))]>; 5392} 5393 5394let Predicates = [HasNEON] in { 5395 defm : BitwisePatterns<"VBIC", BinOpFrag<(and node:$LHS, (vnotd node:$RHS))>, 5396 BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>>; 5397} 5398 5399def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 5400 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5401 IIC_VMOVImm, 5402 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5403 [(set DPR:$Vd, 5404 (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5405 let Inst{9} = SIMM{9}; 5406} 5407 5408def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 5409 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5410 IIC_VMOVImm, 5411 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5412 [(set DPR:$Vd, 5413 (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5414 let Inst{10-9} = SIMM{10-9}; 5415} 5416 5417def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 5418 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5419 IIC_VMOVImm, 5420 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5421 [(set QPR:$Vd, 5422 (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5423 let Inst{9} = SIMM{9}; 5424} 5425 5426def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 5427 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5428 IIC_VMOVImm, 5429 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5430 [(set QPR:$Vd, 5431 (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5432 let Inst{10-9} = SIMM{10-9}; 5433} 5434 5435// VORN : Vector Bitwise OR NOT 5436def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 5437 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5438 "vorn", "$Vd, $Vn, $Vm", "", 5439 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 5440 (vnotd DPR:$Vm))))]>; 5441def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 5442 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5443 "vorn", "$Vd, $Vn, $Vm", "", 5444 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 5445 (vnotq QPR:$Vm))))]>; 5446 5447let Predicates = [HasNEON] in { 5448 defm : BitwisePatterns<"VORN", BinOpFrag<(or node:$LHS, (vnotd node:$RHS))>, 5449 BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>>; 5450} 5451 5452// VMVN : Vector Bitwise NOT (Immediate) 5453 5454let isReMaterializable = 1 in { 5455 5456def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 5457 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5458 "vmvn", "i16", "$Vd, $SIMM", "", 5459 [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> { 5460 let Inst{9} = SIMM{9}; 5461} 5462 5463def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 5464 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5465 "vmvn", "i16", "$Vd, $SIMM", "", 5466 [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> { 5467 let Inst{9} = SIMM{9}; 5468} 5469 5470def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 5471 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5472 "vmvn", "i32", "$Vd, $SIMM", "", 5473 [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> { 5474 let Inst{11-8} = SIMM{11-8}; 5475} 5476 5477def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 5478 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5479 "vmvn", "i32", "$Vd, $SIMM", "", 5480 [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> { 5481 let Inst{11-8} = SIMM{11-8}; 5482} 5483} 5484 5485// VMVN : Vector Bitwise NOT 5486def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 5487 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 5488 "vmvn", "$Vd, $Vm", "", 5489 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 5490def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 5491 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 5492 "vmvn", "$Vd, $Vm", "", 5493 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 5494let Predicates = [HasNEON] in { 5495def : Pat<(v1i64 (vnotd DPR:$src)), 5496 (VMVNd DPR:$src)>; 5497def : Pat<(v4i16 (vnotd DPR:$src)), 5498 (VMVNd DPR:$src)>; 5499def : Pat<(v8i8 (vnotd DPR:$src)), 5500 (VMVNd DPR:$src)>; 5501def : Pat<(v2i64 (vnotq QPR:$src)), 5502 (VMVNq QPR:$src)>; 5503def : Pat<(v8i16 (vnotq QPR:$src)), 5504 (VMVNq QPR:$src)>; 5505def : Pat<(v16i8 (vnotq QPR:$src)), 5506 (VMVNq QPR:$src)>; 5507} 5508 5509// The TwoAddress pass will not go looking for equivalent operations 5510// with different register constraints; it just inserts copies. 5511// That is why pseudo VBSP implemented. Is is expanded later into 5512// VBIT/VBIF/VBSL taking into account register constraints to avoid copies. 5513def VBSPd 5514 : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5515 IIC_VBINiD, "", 5516 [(set DPR:$Vd, 5517 (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 5518let Predicates = [HasNEON] in { 5519def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 5520 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 5521 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5522def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 5523 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 5524 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5525def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 5526 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 5527 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5528def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 5529 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 5530 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5531def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 5532 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 5533 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5534 5535def : Pat<(v8i8 (or (and DPR:$Vn, DPR:$Vd), 5536 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5537 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5538def : Pat<(v4i16 (or (and DPR:$Vn, DPR:$Vd), 5539 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5540 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5541def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 5542 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5543 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5544def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 5545 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5546 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5547} 5548 5549def VBSPq 5550 : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5551 IIC_VBINiQ, "", 5552 [(set QPR:$Vd, 5553 (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 5554let Predicates = [HasNEON] in { 5555def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 5556 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 5557 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5558def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 5559 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 5560 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5561def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 5562 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 5563 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5564def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 5565 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 5566 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5567def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 5568 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 5569 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5570 5571def : Pat<(v16i8 (or (and QPR:$Vn, QPR:$Vd), 5572 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5573 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5574def : Pat<(v8i16 (or (and QPR:$Vn, QPR:$Vd), 5575 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5576 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5577def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 5578 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5579 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5580def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 5581 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5582 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5583} 5584 5585// VBSL : Vector Bitwise Select 5586def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5587 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5588 N3RegFrm, IIC_VBINiD, 5589 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5590 []>; 5591 5592def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5593 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5594 N3RegFrm, IIC_VBINiQ, 5595 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5596 []>; 5597 5598// VBIF : Vector Bitwise Insert if False 5599// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 5600def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 5601 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5602 N3RegFrm, IIC_VBINiD, 5603 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5604 []>; 5605def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 5606 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5607 N3RegFrm, IIC_VBINiQ, 5608 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5609 []>; 5610 5611// VBIT : Vector Bitwise Insert if True 5612// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 5613def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 5614 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5615 N3RegFrm, IIC_VBINiD, 5616 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5617 []>; 5618def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 5619 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5620 N3RegFrm, IIC_VBINiQ, 5621 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5622 []>; 5623 5624// Vector Absolute Differences. 5625 5626// VABD : Vector Absolute Difference 5627defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 5628 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5629 "vabd", "s", int_arm_neon_vabds, 1>; 5630defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 5631 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5632 "vabd", "u", int_arm_neon_vabdu, 1>; 5633def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5634 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5635def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5636 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5637def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, 5638 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, 5639 Requires<[HasNEON, HasFullFP16]>; 5640def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5641 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, 5642 Requires<[HasNEON, HasFullFP16]>; 5643 5644// VABDL : Vector Absolute Difference Long (Q = | D - D |) 5645defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5646 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5647defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5648 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5649 5650let Predicates = [HasNEON] in { 5651def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), 5652 (VABDLuv8i16 DPR:$opA, DPR:$opB)>; 5653def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), 5654 (VABDLuv4i32 DPR:$opA, DPR:$opB)>; 5655} 5656 5657// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the 5658// shift/xor pattern for ABS. 5659 5660def abd_shr : 5661 PatFrag<(ops node:$in1, node:$in2, node:$shift), 5662 (ARMvshrsImm (sub (zext node:$in1), 5663 (zext node:$in2)), (i32 $shift))>; 5664 5665let Predicates = [HasNEON] in { 5666def : Pat<(xor (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)), 5667 (v2i64 (add (sub (zext (v2i32 DPR:$opA)), 5668 (zext (v2i32 DPR:$opB))), 5669 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), 5670 (VABDLuv2i64 DPR:$opA, DPR:$opB)>; 5671} 5672 5673// VABA : Vector Absolute Difference and Accumulate 5674defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5675 "vaba", "s", int_arm_neon_vabds, add>; 5676defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5677 "vaba", "u", int_arm_neon_vabdu, add>; 5678 5679// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5680defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5681 "vabal", "s", int_arm_neon_vabds, zext, add>; 5682defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5683 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5684 5685// Vector Maximum and Minimum. 5686 5687// VMAX : Vector Maximum 5688defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5689 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5690 "vmax", "s", smax, 1>; 5691defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5692 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5693 "vmax", "u", umax, 1>; 5694def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5695 "vmax", "f32", 5696 v2f32, v2f32, fmaximum, 1>; 5697def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5698 "vmax", "f32", 5699 v4f32, v4f32, fmaximum, 1>; 5700def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, 5701 "vmax", "f16", 5702 v4f16, v4f16, fmaximum, 1>, 5703 Requires<[HasNEON, HasFullFP16]>; 5704def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5705 "vmax", "f16", 5706 v8f16, v8f16, fmaximum, 1>, 5707 Requires<[HasNEON, HasFullFP16]>; 5708 5709// VMAXNM 5710let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5711 def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5712 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5713 v2f32, v2f32, fmaxnum, 1>, 5714 Requires<[HasFPARMv8, HasNEON]>; 5715 def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5716 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5717 v4f32, v4f32, fmaxnum, 1>, 5718 Requires<[HasFPARMv8, HasNEON]>; 5719 def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, 5720 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5721 v4f16, v4f16, fmaxnum, 1>, 5722 Requires<[HasFPARMv8, HasNEON, HasFullFP16]>; 5723 def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, 5724 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5725 v8f16, v8f16, fmaxnum, 1>, 5726 Requires<[HasFPARMv8, HasNEON, HasFullFP16]>; 5727} 5728 5729// VMIN : Vector Minimum 5730defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5731 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5732 "vmin", "s", smin, 1>; 5733defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5734 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5735 "vmin", "u", umin, 1>; 5736def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5737 "vmin", "f32", 5738 v2f32, v2f32, fminimum, 1>; 5739def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5740 "vmin", "f32", 5741 v4f32, v4f32, fminimum, 1>; 5742def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, 5743 "vmin", "f16", 5744 v4f16, v4f16, fminimum, 1>, 5745 Requires<[HasNEON, HasFullFP16]>; 5746def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5747 "vmin", "f16", 5748 v8f16, v8f16, fminimum, 1>, 5749 Requires<[HasNEON, HasFullFP16]>; 5750 5751// VMINNM 5752let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5753 def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5754 N3RegFrm, NoItinerary, "vminnm", "f32", 5755 v2f32, v2f32, fminnum, 1>, 5756 Requires<[HasFPARMv8, HasNEON]>; 5757 def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5758 N3RegFrm, NoItinerary, "vminnm", "f32", 5759 v4f32, v4f32, fminnum, 1>, 5760 Requires<[HasFPARMv8, HasNEON]>; 5761 def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, 5762 N3RegFrm, NoItinerary, "vminnm", "f16", 5763 v4f16, v4f16, fminnum, 1>, 5764 Requires<[HasFPARMv8, HasNEON, HasFullFP16]>; 5765 def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, 5766 N3RegFrm, NoItinerary, "vminnm", "f16", 5767 v8f16, v8f16, fminnum, 1>, 5768 Requires<[HasFPARMv8, HasNEON, HasFullFP16]>; 5769} 5770 5771// Vector Pairwise Operations. 5772 5773// VPADD : Vector Pairwise Add 5774def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5775 "vpadd", "i8", 5776 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5777def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5778 "vpadd", "i16", 5779 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5780def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5781 "vpadd", "i32", 5782 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5783def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5784 IIC_VPBIND, "vpadd", "f32", 5785 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5786def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, 5787 IIC_VPBIND, "vpadd", "f16", 5788 v4f16, v4f16, int_arm_neon_vpadd, 0>, 5789 Requires<[HasNEON, HasFullFP16]>; 5790 5791// VPADDL : Vector Pairwise Add Long 5792defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5793 int_arm_neon_vpaddls>; 5794defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5795 int_arm_neon_vpaddlu>; 5796 5797// VPADAL : Vector Pairwise Add and Accumulate Long 5798defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5799 int_arm_neon_vpadals>; 5800defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5801 int_arm_neon_vpadalu>; 5802 5803// VPMAX : Vector Pairwise Maximum 5804def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5805 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5806def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5807 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5808def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5809 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5810def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5811 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5812def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5813 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5814def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5815 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5816def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5817 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5818def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5819 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, 5820 Requires<[HasNEON, HasFullFP16]>; 5821 5822// VPMIN : Vector Pairwise Minimum 5823def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5824 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5825def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5826 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5827def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5828 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5829def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5830 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5831def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5832 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5833def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5834 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5835def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5836 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5837def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5838 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, 5839 Requires<[HasNEON, HasFullFP16]>; 5840 5841// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5842 5843// VRECPE : Vector Reciprocal Estimate 5844def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5845 IIC_VUNAD, "vrecpe", "u32", 5846 v2i32, v2i32, int_arm_neon_vrecpe>; 5847def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5848 IIC_VUNAQ, "vrecpe", "u32", 5849 v4i32, v4i32, int_arm_neon_vrecpe>; 5850def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5851 IIC_VUNAD, "vrecpe", "f32", 5852 v2f32, v2f32, int_arm_neon_vrecpe>; 5853def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5854 IIC_VUNAQ, "vrecpe", "f32", 5855 v4f32, v4f32, int_arm_neon_vrecpe>; 5856def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5857 IIC_VUNAD, "vrecpe", "f16", 5858 v4f16, v4f16, int_arm_neon_vrecpe>, 5859 Requires<[HasNEON, HasFullFP16]>; 5860def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5861 IIC_VUNAQ, "vrecpe", "f16", 5862 v8f16, v8f16, int_arm_neon_vrecpe>, 5863 Requires<[HasNEON, HasFullFP16]>; 5864 5865// VRECPS : Vector Reciprocal Step 5866def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5867 IIC_VRECSD, "vrecps", "f32", 5868 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5869def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5870 IIC_VRECSQ, "vrecps", "f32", 5871 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5872def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5873 IIC_VRECSD, "vrecps", "f16", 5874 v4f16, v4f16, int_arm_neon_vrecps, 1>, 5875 Requires<[HasNEON, HasFullFP16]>; 5876def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5877 IIC_VRECSQ, "vrecps", "f16", 5878 v8f16, v8f16, int_arm_neon_vrecps, 1>, 5879 Requires<[HasNEON, HasFullFP16]>; 5880 5881// VRSQRTE : Vector Reciprocal Square Root Estimate 5882def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5883 IIC_VUNAD, "vrsqrte", "u32", 5884 v2i32, v2i32, int_arm_neon_vrsqrte>; 5885def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5886 IIC_VUNAQ, "vrsqrte", "u32", 5887 v4i32, v4i32, int_arm_neon_vrsqrte>; 5888def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5889 IIC_VUNAD, "vrsqrte", "f32", 5890 v2f32, v2f32, int_arm_neon_vrsqrte>; 5891def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5892 IIC_VUNAQ, "vrsqrte", "f32", 5893 v4f32, v4f32, int_arm_neon_vrsqrte>; 5894def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5895 IIC_VUNAD, "vrsqrte", "f16", 5896 v4f16, v4f16, int_arm_neon_vrsqrte>, 5897 Requires<[HasNEON, HasFullFP16]>; 5898def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5899 IIC_VUNAQ, "vrsqrte", "f16", 5900 v8f16, v8f16, int_arm_neon_vrsqrte>, 5901 Requires<[HasNEON, HasFullFP16]>; 5902 5903// VRSQRTS : Vector Reciprocal Square Root Step 5904def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5905 IIC_VRECSD, "vrsqrts", "f32", 5906 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5907def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5908 IIC_VRECSQ, "vrsqrts", "f32", 5909 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5910def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5911 IIC_VRECSD, "vrsqrts", "f16", 5912 v4f16, v4f16, int_arm_neon_vrsqrts, 1>, 5913 Requires<[HasNEON, HasFullFP16]>; 5914def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5915 IIC_VRECSQ, "vrsqrts", "f16", 5916 v8f16, v8f16, int_arm_neon_vrsqrts, 1>, 5917 Requires<[HasNEON, HasFullFP16]>; 5918 5919// Vector Shifts. 5920 5921// VSHL : Vector Shift 5922defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5923 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5924 "vshl", "s", int_arm_neon_vshifts>; 5925defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5926 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5927 "vshl", "u", int_arm_neon_vshiftu>; 5928 5929let Predicates = [HasNEON] in { 5930def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5931 (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>; 5932def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5933 (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>; 5934def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5935 (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>; 5936def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5937 (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>; 5938def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5939 (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>; 5940def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5941 (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>; 5942def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5943 (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>; 5944def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5945 (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>; 5946 5947def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5948 (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>; 5949def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5950 (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>; 5951def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5952 (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>; 5953def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5954 (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>; 5955def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5956 (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>; 5957def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5958 (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>; 5959def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5960 (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>; 5961def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5962 (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>; 5963 5964} 5965 5966// VSHL : Vector Shift Left (Immediate) 5967defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>; 5968 5969// VSHR : Vector Shift Right (Immediate) 5970defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", 5971 ARMvshrsImm>; 5972defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", 5973 ARMvshruImm>; 5974 5975// VSHLL : Vector Shift Left Long 5976defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 5977 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>; 5978defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 5979 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>; 5980 5981// VSHLL : Vector Shift Left Long (with maximum shift count) 5982class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 5983 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 5984 ValueType OpTy, Operand ImmTy> 5985 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 5986 ResTy, OpTy, ImmTy, null_frag> { 5987 let Inst{21-16} = op21_16; 5988 let DecoderMethod = "DecodeVSHLMaxInstruction"; 5989} 5990def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 5991 v8i16, v8i8, imm8>; 5992def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 5993 v4i32, v4i16, imm16>; 5994def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 5995 v2i64, v2i32, imm32>; 5996 5997let Predicates = [HasNEON] in { 5998def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))), 5999 (VSHLLi8 DPR:$Rn, 8)>; 6000def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))), 6001 (VSHLLi16 DPR:$Rn, 16)>; 6002def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))), 6003 (VSHLLi32 DPR:$Rn, 32)>; 6004def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))), 6005 (VSHLLi8 DPR:$Rn, 8)>; 6006def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))), 6007 (VSHLLi16 DPR:$Rn, 16)>; 6008def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))), 6009 (VSHLLi32 DPR:$Rn, 32)>; 6010def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))), 6011 (VSHLLi8 DPR:$Rn, 8)>; 6012def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))), 6013 (VSHLLi16 DPR:$Rn, 16)>; 6014def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))), 6015 (VSHLLi32 DPR:$Rn, 32)>; 6016} 6017 6018// VSHRN : Vector Shift Right and Narrow 6019defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 6020 PatFrag<(ops node:$Rn, node:$amt), 6021 (trunc (ARMvshrsImm node:$Rn, node:$amt))>>; 6022 6023let Predicates = [HasNEON] in { 6024def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))), 6025 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 6026def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))), 6027 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 6028def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))), 6029 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 6030} 6031 6032// VRSHL : Vector Rounding Shift 6033defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 6034 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6035 "vrshl", "s", int_arm_neon_vrshifts>; 6036defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 6037 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6038 "vrshl", "u", int_arm_neon_vrshiftu>; 6039// VRSHR : Vector Rounding Shift Right 6040defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", 6041 NEONvrshrsImm>; 6042defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", 6043 NEONvrshruImm>; 6044 6045// VRSHRN : Vector Rounding Shift Right and Narrow 6046defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 6047 NEONvrshrnImm>; 6048 6049// VQSHL : Vector Saturating Shift 6050defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 6051 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6052 "vqshl", "s", int_arm_neon_vqshifts>; 6053defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 6054 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6055 "vqshl", "u", int_arm_neon_vqshiftu>; 6056// VQSHL : Vector Saturating Shift Left (Immediate) 6057defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>; 6058defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>; 6059 6060// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 6061defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>; 6062 6063// VQSHRN : Vector Saturating Shift Right and Narrow 6064defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 6065 NEONvqshrnsImm>; 6066defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 6067 NEONvqshrnuImm>; 6068 6069// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 6070defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 6071 NEONvqshrnsuImm>; 6072 6073// VQRSHL : Vector Saturating Rounding Shift 6074defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 6075 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6076 "vqrshl", "s", int_arm_neon_vqrshifts>; 6077defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 6078 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6079 "vqrshl", "u", int_arm_neon_vqrshiftu>; 6080 6081// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 6082defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 6083 NEONvqrshrnsImm>; 6084defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 6085 NEONvqrshrnuImm>; 6086 6087// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 6088defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 6089 NEONvqrshrnsuImm>; 6090 6091// VSRA : Vector Shift Right and Accumulate 6092defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>; 6093defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>; 6094// VRSRA : Vector Rounding Shift Right and Accumulate 6095defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>; 6096defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>; 6097 6098// VSLI : Vector Shift Left and Insert 6099defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 6100 6101// VSRI : Vector Shift Right and Insert 6102defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 6103 6104// Vector Absolute and Saturating Absolute. 6105 6106// VABS : Vector Absolute Value 6107defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 6108 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; 6109def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6110 "vabs", "f32", 6111 v2f32, v2f32, fabs>; 6112def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6113 "vabs", "f32", 6114 v4f32, v4f32, fabs>; 6115def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6116 "vabs", "f16", 6117 v4f16, v4f16, fabs>, 6118 Requires<[HasNEON, HasFullFP16]>; 6119def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6120 "vabs", "f16", 6121 v8f16, v8f16, fabs>, 6122 Requires<[HasNEON, HasFullFP16]>; 6123 6124// VQABS : Vector Saturating Absolute Value 6125defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 6126 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 6127 int_arm_neon_vqabs>; 6128 6129// Vector Negate. 6130 6131def vnegd : PatFrag<(ops node:$in), 6132 (sub ARMimmAllZerosD, node:$in)>; 6133def vnegq : PatFrag<(ops node:$in), 6134 (sub ARMimmAllZerosV, node:$in)>; 6135 6136class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6137 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 6138 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 6139 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 6140class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6141 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 6142 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 6143 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 6144 6145// VNEG : Vector Negate (integer) 6146def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 6147def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 6148def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 6149def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 6150def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 6151def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 6152 6153// VNEG : Vector Negate (floating-point) 6154def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 6155 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6156 "vneg", "f32", "$Vd, $Vm", "", 6157 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 6158def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 6159 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6160 "vneg", "f32", "$Vd, $Vm", "", 6161 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 6162def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, 6163 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6164 "vneg", "f16", "$Vd, $Vm", "", 6165 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, 6166 Requires<[HasNEON, HasFullFP16]>; 6167def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, 6168 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6169 "vneg", "f16", "$Vd, $Vm", "", 6170 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, 6171 Requires<[HasNEON, HasFullFP16]>; 6172 6173let Predicates = [HasNEON] in { 6174def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 6175def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 6176def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 6177def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 6178def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 6179def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 6180} 6181 6182// VQNEG : Vector Saturating Negate 6183defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 6184 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 6185 int_arm_neon_vqneg>; 6186 6187// Vector Bit Counting Operations. 6188 6189// VCLS : Vector Count Leading Sign Bits 6190defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 6191 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 6192 int_arm_neon_vcls>; 6193// VCLZ : Vector Count Leading Zeros 6194defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 6195 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 6196 ctlz>; 6197// VCNT : Vector Count One Bits 6198def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6199 IIC_VCNTiD, "vcnt", "8", 6200 v8i8, v8i8, ctpop>; 6201def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6202 IIC_VCNTiQ, "vcnt", "8", 6203 v16i8, v16i8, ctpop>; 6204 6205// Vector Swap 6206def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 6207 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 6208 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6209 []>; 6210def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 6211 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 6212 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6213 []>; 6214 6215// Vector Move Operations. 6216 6217// VMOV : Vector Move (Register) 6218def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6219 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6220def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6221 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6222 6223// VMOV : Vector Move (Immediate) 6224 6225// Although VMOVs are not strictly speaking cheap, they are as expensive 6226// as their copies counterpart (VORR), so we should prefer rematerialization 6227// over splitting when it applies. 6228let isReMaterializable = 1, isAsCheapAsAMove=1 in { 6229def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 6230 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6231 "vmov", "i8", "$Vd, $SIMM", "", 6232 [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>; 6233def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 6234 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6235 "vmov", "i8", "$Vd, $SIMM", "", 6236 [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>; 6237 6238def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 6239 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6240 "vmov", "i16", "$Vd, $SIMM", "", 6241 [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> { 6242 let Inst{9} = SIMM{9}; 6243} 6244 6245def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 6246 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6247 "vmov", "i16", "$Vd, $SIMM", "", 6248 [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> { 6249 let Inst{9} = SIMM{9}; 6250} 6251 6252def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 6253 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6254 "vmov", "i32", "$Vd, $SIMM", "", 6255 [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> { 6256 let Inst{11-8} = SIMM{11-8}; 6257} 6258 6259def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 6260 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6261 "vmov", "i32", "$Vd, $SIMM", "", 6262 [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> { 6263 let Inst{11-8} = SIMM{11-8}; 6264} 6265 6266def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 6267 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6268 "vmov", "i64", "$Vd, $SIMM", "", 6269 [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>; 6270def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 6271 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6272 "vmov", "i64", "$Vd, $SIMM", "", 6273 [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>; 6274 6275def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 6276 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6277 "vmov", "f32", "$Vd, $SIMM", "", 6278 [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>; 6279def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 6280 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6281 "vmov", "f32", "$Vd, $SIMM", "", 6282 [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>; 6283} // isReMaterializable, isAsCheapAsAMove 6284 6285// Add support for bytes replication feature, so it could be GAS compatible. 6286multiclass NEONImmReplicateI8InstAlias<ValueType To> { 6287 // E.g. instructions below: 6288 // "vmov.i32 d0, #0xffffffff" 6289 // "vmov.i32 d0, #0xabababab" 6290 // "vmov.i16 d0, #0xabab" 6291 // are incorrect, but we could deal with such cases. 6292 // For last two instructions, for example, it should emit: 6293 // "vmov.i8 d0, #0xab" 6294 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6295 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6296 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6297 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6298 // Also add same support for VMVN instructions. So instruction: 6299 // "vmvn.i32 d0, #0xabababab" 6300 // actually means: 6301 // "vmov.i8 d0, #0x54" 6302 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6303 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6304 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6305 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6306} 6307 6308defm : NEONImmReplicateI8InstAlias<i16>; 6309defm : NEONImmReplicateI8InstAlias<i32>; 6310defm : NEONImmReplicateI8InstAlias<i64>; 6311 6312// Similar to above for types other than i8, e.g.: 6313// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00" 6314// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000" 6315// In this case we do not canonicalize VMVN to VMOV 6316multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16, 6317 NeonI NV8, NeonI NV16, ValueType To> { 6318 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6319 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6320 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6321 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6322 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6323 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6324 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6325 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6326} 6327 6328defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6329 VMVNv4i16, VMVNv8i16, i32>; 6330defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6331 VMVNv4i16, VMVNv8i16, i64>; 6332defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32, 6333 VMVNv2i32, VMVNv4i32, i64>; 6334// TODO: add "VMOV <-> VMVN" conversion for cases like 6335// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55" 6336// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00" 6337 6338// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 6339// require zero cycles to execute so they should be used wherever possible for 6340// setting a register to zero. 6341 6342// Even without these pseudo-insts we would probably end up with the correct 6343// instruction, but we could not mark the general ones with "isAsCheapAsAMove" 6344// since they are sometimes rather expensive (in general). 6345 6346let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 6347 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 6348 [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))], 6349 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 6350 Requires<[HasZCZ]>; 6351 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 6352 [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))], 6353 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 6354 Requires<[HasZCZ]>; 6355} 6356 6357// VMOV : Vector Get Lane (move scalar to ARM core register) 6358 6359def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 6360 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6361 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 6362 [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V), 6363 imm:$lane))]> { 6364 let Inst{21} = lane{2}; 6365 let Inst{6-5} = lane{1-0}; 6366} 6367def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 6368 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6369 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 6370 [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V), 6371 imm:$lane))]> { 6372 let Inst{21} = lane{1}; 6373 let Inst{6} = lane{0}; 6374} 6375def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 6376 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6377 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 6378 [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V), 6379 imm:$lane))]> { 6380 let Inst{21} = lane{2}; 6381 let Inst{6-5} = lane{1-0}; 6382} 6383def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 6384 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6385 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 6386 [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V), 6387 imm:$lane))]> { 6388 let Inst{21} = lane{1}; 6389 let Inst{6} = lane{0}; 6390} 6391def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 6392 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 6393 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 6394 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 6395 imm:$lane))]>, 6396 Requires<[HasFPRegs, HasFastVGETLNi32]> { 6397 let Inst{21} = lane{0}; 6398} 6399// VGETLNi32 is also legal as just vmov r0,d0[0] without the .32 suffix 6400def : InstAlias<"vmov${p} $R, $V$lane", 6401 (VGETLNi32 GPR:$R, DPR:$V, VectorIndex32:$lane, pred:$p), 0>, 6402 Requires<VGETLNi32.Predicates>; 6403let Predicates = [HasNEON] in { 6404// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 6405def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane), 6406 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6407 (DSubReg_i8_reg imm:$lane))), 6408 (SubReg_i8_lane imm:$lane))>; 6409def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane), 6410 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6411 (DSubReg_i16_reg imm:$lane))), 6412 (SubReg_i16_lane imm:$lane))>; 6413def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane), 6414 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6415 (DSubReg_i8_reg imm:$lane))), 6416 (SubReg_i8_lane imm:$lane))>; 6417def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane), 6418 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6419 (DSubReg_i16_reg imm:$lane))), 6420 (SubReg_i16_lane imm:$lane))>; 6421def : Pat<(ARMvgetlaneu (v8f16 QPR:$src), imm:$lane), 6422 (VGETLNu16 (v4f16 (EXTRACT_SUBREG QPR:$src, 6423 (DSubReg_i16_reg imm:$lane))), 6424 (SubReg_i16_lane imm:$lane))>; 6425def : Pat<(ARMvgetlaneu (v4f16 DPR:$src), imm:$lane), 6426 (VGETLNu16 (v4f16 DPR:$src), imm:$lane)>; 6427def : Pat<(ARMvgetlaneu (v8bf16 QPR:$src), imm:$lane), 6428 (VGETLNu16 (v4bf16 (EXTRACT_SUBREG QPR:$src, 6429 (DSubReg_i16_reg imm:$lane))), 6430 (SubReg_i16_lane imm:$lane))>; 6431def : Pat<(ARMvgetlaneu (v4bf16 DPR:$src), imm:$lane), 6432 (VGETLNu16 (v4bf16 DPR:$src), imm:$lane)>; 6433} 6434def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6435 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 6436 (DSubReg_i32_reg imm:$lane))), 6437 (SubReg_i32_lane imm:$lane))>, 6438 Requires<[HasNEON, HasFastVGETLNi32]>; 6439def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 6440 (COPY_TO_REGCLASS 6441 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6442 Requires<[HasNEON, HasSlowVGETLNi32]>; 6443def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6444 (COPY_TO_REGCLASS 6445 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6446 Requires<[HasNEON, HasSlowVGETLNi32]>; 6447let Predicates = [HasNEON] in { 6448def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 6449 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 6450 (SSubReg_f32_reg imm:$src2))>; 6451def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 6452 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 6453 (SSubReg_f32_reg imm:$src2))>; 6454//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 6455// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6456def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 6457 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6458} 6459 6460multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> { 6461 def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane), 6462 (EXTRACT_SUBREG 6463 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6464 (SSubReg_f16_reg imm_even:$lane))>; 6465 def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane), 6466 (EXTRACT_SUBREG 6467 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6468 (SSubReg_f16_reg imm_even:$lane))>; 6469} 6470 6471multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> { 6472 def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane), 6473 (COPY_TO_REGCLASS 6474 (VMOVH (EXTRACT_SUBREG 6475 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6476 (SSubReg_f16_reg imm_odd:$lane))), 6477 HPR)>; 6478 def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane), 6479 (COPY_TO_REGCLASS 6480 (VMOVH (EXTRACT_SUBREG 6481 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6482 (SSubReg_f16_reg imm_odd:$lane))), 6483 HPR)>; 6484} 6485 6486let Predicates = [HasNEON] in { 6487 defm : ExtractEltEvenF16<v4f16, v8f16>; 6488 defm : ExtractEltOddF16VMOVH<v4f16, v8f16>; 6489} 6490 6491let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in { 6492 // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes 6493 defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>; 6494} 6495 6496let Predicates = [HasBF16, HasNEON] in { 6497 defm : ExtractEltEvenF16<v4bf16, v8bf16>; 6498 6499 // Otherwise, if VMOVH is not available resort to extracting the odd lane 6500 // into a GPR and then moving to HPR 6501 def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane), 6502 (COPY_TO_REGCLASS 6503 (VGETLNu16 (v4bf16 DPR:$src), imm:$lane), 6504 HPR)>; 6505 6506 def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane), 6507 (COPY_TO_REGCLASS 6508 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6509 (DSubReg_i16_reg imm:$lane))), 6510 (SubReg_i16_lane imm:$lane)), 6511 HPR)>; 6512} 6513 6514// VMOV : Vector Set Lane (move ARM core register to scalar) 6515 6516let Constraints = "$src1 = $V" in { 6517def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 6518 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 6519 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 6520 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 6521 GPR:$R, imm:$lane))]> { 6522 let Inst{21} = lane{2}; 6523 let Inst{6-5} = lane{1-0}; 6524} 6525def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 6526 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 6527 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 6528 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 6529 GPR:$R, imm:$lane))]> { 6530 let Inst{21} = lane{1}; 6531 let Inst{6} = lane{0}; 6532} 6533def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 6534 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 6535 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 6536 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 6537 GPR:$R, imm:$lane))]>, 6538 Requires<[HasVFP2]> { 6539 let Inst{21} = lane{0}; 6540 // This instruction is equivalent as 6541 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 6542 let isInsertSubreg = 1; 6543} 6544} 6545// VSETLNi32 is also legal as just vmov d0[0],r0 without the .32 suffix 6546def : InstAlias<"vmov${p} $V$lane, $R", 6547 (VSETLNi32 DPR:$V, GPR:$R, VectorIndex32:$lane, pred:$p), 0>, 6548 Requires<VSETLNi32.Predicates>; 6549 6550// TODO: for odd lanes we could optimize this a bit by using the VINS 6551// FullFP16 instruction when it is available 6552multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> { 6553 def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6554 (VT4 (VSETLNi16 DPR:$src1, 6555 (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>; 6556 def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6557 (VT8 (INSERT_SUBREG QPR:$src1, 6558 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6559 (DSubReg_i16_reg imm:$lane))), 6560 (COPY_TO_REGCLASS HPR:$src2, GPR), 6561 (SubReg_i16_lane imm:$lane))), 6562 (DSubReg_i16_reg imm:$lane)))>; 6563} 6564 6565let Predicates = [HasNEON] in { 6566def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 6567 (v16i8 (INSERT_SUBREG QPR:$src1, 6568 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 6569 (DSubReg_i8_reg imm:$lane))), 6570 GPR:$src2, (SubReg_i8_lane imm:$lane))), 6571 (DSubReg_i8_reg imm:$lane)))>; 6572def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 6573 (v8i16 (INSERT_SUBREG QPR:$src1, 6574 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6575 (DSubReg_i16_reg imm:$lane))), 6576 GPR:$src2, (SubReg_i16_lane imm:$lane))), 6577 (DSubReg_i16_reg imm:$lane)))>; 6578def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 6579 (v4i32 (INSERT_SUBREG QPR:$src1, 6580 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 6581 (DSubReg_i32_reg imm:$lane))), 6582 GPR:$src2, (SubReg_i32_lane imm:$lane))), 6583 (DSubReg_i32_reg imm:$lane)))>; 6584 6585def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 6586 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 6587 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6588def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 6589 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 6590 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6591 6592defm : InsertEltF16<f16, v4f16, v8f16>; 6593 6594def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6595 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6596 6597def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 6598 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6599def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 6600 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 6601def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 6602 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6603 6604def : Pat<(v4f16 (scalar_to_vector (f16 HPR:$src))), 6605 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>; 6606def : Pat<(v8f16 (scalar_to_vector (f16 HPR:$src))), 6607 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>; 6608 6609def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 6610 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6611def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 6612 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6613def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 6614 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6615 6616def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 6617 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6618 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6619 dsub_0)>; 6620def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 6621 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6622 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6623 dsub_0)>; 6624def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 6625 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6626 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6627 dsub_0)>; 6628} 6629 6630let Predicates = [HasNEON, HasBF16] in 6631defm : InsertEltF16<bf16, v4bf16, v8bf16>; 6632 6633// VDUP : Vector Duplicate (from ARM core register to all elements) 6634 6635class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6636 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 6637 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6638 [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6639class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6640 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 6641 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6642 [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6643 6644def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 6645def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 6646def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 6647 Requires<[HasNEON, HasFastVDUP32]>; 6648def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 6649def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 6650def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 6651 6652// ARMvdup patterns for uarchs with fast VDUP.32. 6653def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 6654 Requires<[HasNEON,HasFastVDUP32]>; 6655def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>, 6656 Requires<[HasNEON]>; 6657 6658// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 6659def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 6660 Requires<[HasNEON,HasSlowVDUP32]>; 6661def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 6662 Requires<[HasNEON,HasSlowVDUP32]>; 6663 6664// VDUP : Vector Duplicate Lane (from scalar to all elements) 6665 6666class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 6667 ValueType Ty, Operand IdxTy> 6668 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6669 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 6670 [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>; 6671 6672class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 6673 ValueType ResTy, ValueType OpTy, Operand IdxTy> 6674 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6675 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 6676 [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm), 6677 VectorIndex32:$lane)))]>; 6678 6679// Inst{19-16} is partially specified depending on the element size. 6680 6681def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 6682 bits<3> lane; 6683 let Inst{19-17} = lane{2-0}; 6684} 6685def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 6686 bits<2> lane; 6687 let Inst{19-18} = lane{1-0}; 6688} 6689def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 6690 bits<1> lane; 6691 let Inst{19} = lane{0}; 6692} 6693def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 6694 bits<3> lane; 6695 let Inst{19-17} = lane{2-0}; 6696} 6697def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 6698 bits<2> lane; 6699 let Inst{19-18} = lane{1-0}; 6700} 6701def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 6702 bits<1> lane; 6703 let Inst{19} = lane{0}; 6704} 6705 6706let Predicates = [HasNEON] in { 6707def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)), 6708 (VDUPLN16d DPR:$Vm, imm:$lane)>; 6709 6710def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6711 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6712 6713def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6714 (VDUPLN32q DPR:$Vm, imm:$lane)>; 6715 6716def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)), 6717 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 6718 (DSubReg_i8_reg imm:$lane))), 6719 (SubReg_i8_lane imm:$lane)))>; 6720def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)), 6721 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 6722 (DSubReg_i16_reg imm:$lane))), 6723 (SubReg_i16_lane imm:$lane)))>; 6724def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)), 6725 (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src, 6726 (DSubReg_i16_reg imm:$lane))), 6727 (SubReg_i16_lane imm:$lane)))>; 6728def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)), 6729 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 6730 (DSubReg_i32_reg imm:$lane))), 6731 (SubReg_i32_lane imm:$lane)))>; 6732def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)), 6733 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 6734 (DSubReg_i32_reg imm:$lane))), 6735 (SubReg_i32_lane imm:$lane)))>; 6736 6737def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))), 6738 (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6739 (f16 HPR:$src), ssub_0), (i32 0)))>; 6740def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))), 6741 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6742 SPR:$src, ssub_0), (i32 0)))>; 6743def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))), 6744 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6745 SPR:$src, ssub_0), (i32 0)))>; 6746def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))), 6747 (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6748 (f16 HPR:$src), ssub_0), (i32 0)))>; 6749} 6750 6751let Predicates = [HasNEON, HasBF16] in { 6752def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)), 6753 (VDUPLN16d DPR:$Vm, imm:$lane)>; 6754 6755def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)), 6756 (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src, 6757 (DSubReg_i16_reg imm:$lane))), 6758 (SubReg_i16_lane imm:$lane)))>; 6759 6760def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))), 6761 (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6762 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6763def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))), 6764 (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6765 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6766} 6767 6768// VMOVN : Vector Narrowing Move 6769defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 6770 "vmovn", "i", trunc>; 6771// VQMOVN : Vector Saturating Narrowing Move 6772defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 6773 "vqmovn", "s", int_arm_neon_vqmovns>; 6774defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 6775 "vqmovn", "u", int_arm_neon_vqmovnu>; 6776defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 6777 "vqmovun", "s", int_arm_neon_vqmovnsu>; 6778// VMOVL : Vector Lengthening Move 6779defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 6780defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 6781 6782let Predicates = [HasNEON] in { 6783def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 6784def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 6785def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 6786} 6787 6788// Vector Conversions. 6789 6790// VCVT : Vector Convert Between Floating-Point and Integers 6791def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6792 v2i32, v2f32, fp_to_sint>; 6793def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6794 v2i32, v2f32, fp_to_uint>; 6795def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6796 v2f32, v2i32, sint_to_fp>; 6797def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6798 v2f32, v2i32, uint_to_fp>; 6799 6800def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6801 v4i32, v4f32, fp_to_sint>; 6802def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6803 v4i32, v4f32, fp_to_uint>; 6804def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6805 v4f32, v4i32, sint_to_fp>; 6806def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6807 v4f32, v4i32, uint_to_fp>; 6808 6809def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6810 v4i16, v4f16, fp_to_sint>, 6811 Requires<[HasNEON, HasFullFP16]>; 6812def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6813 v4i16, v4f16, fp_to_uint>, 6814 Requires<[HasNEON, HasFullFP16]>; 6815def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6816 v4f16, v4i16, sint_to_fp>, 6817 Requires<[HasNEON, HasFullFP16]>; 6818def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6819 v4f16, v4i16, uint_to_fp>, 6820 Requires<[HasNEON, HasFullFP16]>; 6821 6822def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6823 v8i16, v8f16, fp_to_sint>, 6824 Requires<[HasNEON, HasFullFP16]>; 6825def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6826 v8i16, v8f16, fp_to_uint>, 6827 Requires<[HasNEON, HasFullFP16]>; 6828def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6829 v8f16, v8i16, sint_to_fp>, 6830 Requires<[HasNEON, HasFullFP16]>; 6831def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6832 v8f16, v8i16, uint_to_fp>, 6833 Requires<[HasNEON, HasFullFP16]>; 6834 6835// VCVT{A, N, P, M} 6836multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 6837 SDPatternOperator IntU> { 6838 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6839 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6840 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 6841 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6842 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 6843 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6844 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 6845 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6846 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 6847 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6848 "s16.f16", v4i16, v4f16, IntS>, 6849 Requires<[HasV8, HasNEON, HasFullFP16]>; 6850 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6851 "s16.f16", v8i16, v8f16, IntS>, 6852 Requires<[HasV8, HasNEON, HasFullFP16]>; 6853 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6854 "u16.f16", v4i16, v4f16, IntU>, 6855 Requires<[HasV8, HasNEON, HasFullFP16]>; 6856 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6857 "u16.f16", v8i16, v8f16, IntU>, 6858 Requires<[HasV8, HasNEON, HasFullFP16]>; 6859 } 6860} 6861 6862defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 6863defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 6864defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 6865defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 6866 6867// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 6868let DecoderMethod = "DecodeVCVTD" in { 6869def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6870 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 6871def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6872 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 6873def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6874 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 6875def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6876 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 6877let Predicates = [HasNEON, HasFullFP16] in { 6878def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6879 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; 6880def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6881 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; 6882def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6883 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; 6884def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6885 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; 6886} // Predicates = [HasNEON, HasFullFP16] 6887} 6888 6889let DecoderMethod = "DecodeVCVTQ" in { 6890def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6891 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 6892def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6893 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 6894def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6895 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 6896def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6897 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 6898let Predicates = [HasNEON, HasFullFP16] in { 6899def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6900 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; 6901def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6902 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; 6903def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6904 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; 6905def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6906 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; 6907} // Predicates = [HasNEON, HasFullFP16] 6908} 6909 6910def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 6911 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6912def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 6913 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6914def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 6915 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6916def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 6917 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6918 6919def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 6920 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6921def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 6922 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6923def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 6924 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6925def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 6926 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6927 6928def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", 6929 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6930def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", 6931 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6932def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", 6933 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6934def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", 6935 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6936 6937def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", 6938 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6939def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", 6940 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6941def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", 6942 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6943def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", 6944 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6945 6946 6947// VCVT : Vector Convert Between Half-Precision and Single-Precision. 6948def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 6949 IIC_VUNAQ, "vcvt", "f16.f32", 6950 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 6951 Requires<[HasNEON, HasFP16]>; 6952def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 6953 IIC_VUNAQ, "vcvt", "f32.f16", 6954 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 6955 Requires<[HasNEON, HasFP16]>; 6956 6957def : Pat<(v4f16 (fpround (v4f32 QPR:$src))), (VCVTf2h QPR:$src)>; 6958def : Pat<(v4f32 (fpextend (v4f16 DPR:$src))), (VCVTh2f DPR:$src)>; 6959 6960// Vector Reverse. 6961 6962// VREV64 : Vector Reverse elements within 64-bit doublewords 6963 6964class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6965 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 6966 (ins DPR:$Vm), IIC_VMOVD, 6967 OpcodeStr, Dt, "$Vd, $Vm", "", 6968 [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>; 6969class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6970 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 6971 (ins QPR:$Vm), IIC_VMOVQ, 6972 OpcodeStr, Dt, "$Vd, $Vm", "", 6973 [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>; 6974 6975def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 6976def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 6977def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 6978let Predicates = [HasNEON] in { 6979def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 6980} 6981 6982def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 6983def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 6984def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 6985 6986let Predicates = [HasNEON] in { 6987 def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), 6988 (VREV64q32 QPR:$Vm)>; 6989 def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), 6990 (VREV64q16 QPR:$Vm)>; 6991 def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), 6992 (VREV64d16 DPR:$Vm)>; 6993 def : Pat<(v8bf16 (ARMvrev64 (v8bf16 QPR:$Vm))), 6994 (VREV64q16 QPR:$Vm)>; 6995 def : Pat<(v4bf16 (ARMvrev64 (v4bf16 DPR:$Vm))), 6996 (VREV64d16 DPR:$Vm)>; 6997} 6998 6999// VREV32 : Vector Reverse elements within 32-bit words 7000 7001class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 7002 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 7003 (ins DPR:$Vm), IIC_VMOVD, 7004 OpcodeStr, Dt, "$Vd, $Vm", "", 7005 [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>; 7006class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 7007 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 7008 (ins QPR:$Vm), IIC_VMOVQ, 7009 OpcodeStr, Dt, "$Vd, $Vm", "", 7010 [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>; 7011 7012def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 7013def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 7014 7015def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 7016def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 7017 7018let Predicates = [HasNEON] in { 7019 def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))), 7020 (VREV32q16 QPR:$Vm)>; 7021 def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))), 7022 (VREV32d16 DPR:$Vm)>; 7023 def : Pat<(v8bf16 (ARMvrev32 (v8bf16 QPR:$Vm))), 7024 (VREV32q16 QPR:$Vm)>; 7025 def : Pat<(v4bf16 (ARMvrev32 (v4bf16 DPR:$Vm))), 7026 (VREV32d16 DPR:$Vm)>; 7027} 7028 7029// VREV16 : Vector Reverse elements within 16-bit halfwords 7030 7031class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 7032 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 7033 (ins DPR:$Vm), IIC_VMOVD, 7034 OpcodeStr, Dt, "$Vd, $Vm", "", 7035 [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>; 7036class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 7037 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 7038 (ins QPR:$Vm), IIC_VMOVQ, 7039 OpcodeStr, Dt, "$Vd, $Vm", "", 7040 [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>; 7041 7042def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 7043def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 7044 7045// Other Vector Shuffles. 7046 7047// Aligned extractions: really just dropping registers 7048 7049class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 7050 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 7051 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>, 7052 Requires<[HasNEON]>; 7053 7054def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 7055def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 7056def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 7057def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 7058def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 7059def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; 7060def : AlignedVEXTq<v4bf16, v8bf16, DSubReg_i16_reg>; 7061 7062 7063// VEXT : Vector Extract 7064 7065// All of these have a two-operand InstAlias. 7066let TwoOperandAliasConstraint = "$Vn = $Vd" in { 7067class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 7068 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 7069 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 7070 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 7071 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 7072 (Ty DPR:$Vm), imm:$index)))]> { 7073 bits<3> index; 7074 let Inst{11} = 0b0; 7075 let Inst{10-8} = index{2-0}; 7076} 7077 7078class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 7079 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 7080 (ins QPR:$Vn, QPR:$Vm, immTy:$index), NVExtFrm, 7081 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 7082 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 7083 (Ty QPR:$Vm), imm:$index)))]> { 7084 bits<4> index; 7085 let Inst{11-8} = index{3-0}; 7086} 7087} 7088 7089def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 7090 let Inst{10-8} = index{2-0}; 7091} 7092def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 7093 let Inst{10-9} = index{1-0}; 7094 let Inst{8} = 0b0; 7095} 7096let Predicates = [HasNEON] in { 7097def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))), 7098 (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>; 7099def : Pat<(v4bf16 (NEONvext (v4bf16 DPR:$Vn), (v4bf16 DPR:$Vm), (i32 imm:$index))), 7100 (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>; 7101} 7102 7103def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 7104 let Inst{10} = index{0}; 7105 let Inst{9-8} = 0b00; 7106} 7107let Predicates = [HasNEON] in { 7108def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))), 7109 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 7110} 7111 7112def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 7113 let Inst{11-8} = index{3-0}; 7114} 7115def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 7116 let Inst{11-9} = index{2-0}; 7117 let Inst{8} = 0b0; 7118} 7119let Predicates = [HasNEON] in { 7120def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))), 7121 (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>; 7122def : Pat<(v8bf16 (NEONvext (v8bf16 QPR:$Vn), (v8bf16 QPR:$Vm), (i32 imm:$index))), 7123 (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>; 7124} 7125 7126def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 7127 let Inst{11-10} = index{1-0}; 7128 let Inst{9-8} = 0b00; 7129} 7130def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 7131 let Inst{11} = index{0}; 7132 let Inst{10-8} = 0b000; 7133} 7134let Predicates = [HasNEON] in { 7135def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), 7136 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 7137} 7138 7139// VTRN : Vector Transpose 7140 7141def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 7142def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 7143def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 7144 7145def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 7146def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 7147def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 7148 7149// VUZP : Vector Unzip (Deinterleave) 7150 7151def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 7152def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 7153// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7154def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 7155 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7156 7157def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 7158def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 7159def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 7160 7161// VZIP : Vector Zip (Interleave) 7162 7163def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 7164def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 7165// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7166def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 7167 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7168 7169def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 7170def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 7171def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 7172 7173// Vector Table Lookup and Table Extension. 7174 7175// VTBL : Vector Table Lookup 7176let DecoderMethod = "DecodeTBLInstruction" in { 7177def VTBL1 7178 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 7179 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 7180 "vtbl", "8", "$Vd, $Vn, $Vm", "", 7181 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 7182 7183let hasExtraSrcRegAllocReq = 1 in { 7184def VTBL2 7185 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 7186 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 7187 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7188def VTBL3 7189 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 7190 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 7191 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7192def VTBL4 7193 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 7194 (ins VecListFourD:$Vn, DPR:$Vm), 7195 NVTBLFrm, IIC_VTB4, 7196 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7197} // hasExtraSrcRegAllocReq = 1 7198 7199def VTBL3Pseudo 7200 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 7201def VTBL4Pseudo 7202 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 7203 7204// VTBX : Vector Table Extension 7205def VTBX1 7206 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 7207 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 7208 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 7209 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 7210 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 7211let hasExtraSrcRegAllocReq = 1 in { 7212def VTBX2 7213 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 7214 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 7215 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 7216def VTBX3 7217 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 7218 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 7219 NVTBLFrm, IIC_VTBX3, 7220 "vtbx", "8", "$Vd, $Vn, $Vm", 7221 "$orig = $Vd", []>; 7222def VTBX4 7223 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 7224 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 7225 "vtbx", "8", "$Vd, $Vn, $Vm", 7226 "$orig = $Vd", []>; 7227} // hasExtraSrcRegAllocReq = 1 7228 7229def VTBX3Pseudo 7230 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7231 IIC_VTBX3, "$orig = $dst", []>; 7232def VTBX4Pseudo 7233 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7234 IIC_VTBX4, "$orig = $dst", []>; 7235} // DecoderMethod = "DecodeTBLInstruction" 7236 7237let Predicates = [HasNEON] in { 7238def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), 7239 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7240 v8i8:$Vn1, dsub_1), 7241 v8i8:$Vm))>; 7242def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7243 v8i8:$Vm)), 7244 (v8i8 (VTBX2 v8i8:$orig, 7245 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7246 v8i8:$Vn1, dsub_1), 7247 v8i8:$Vm))>; 7248 7249def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, 7250 v8i8:$Vn2, v8i8:$Vm)), 7251 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7252 v8i8:$Vn1, dsub_1, 7253 v8i8:$Vn2, dsub_2, 7254 (v8i8 (IMPLICIT_DEF)), dsub_3), 7255 v8i8:$Vm))>; 7256def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7257 v8i8:$Vn2, v8i8:$Vm)), 7258 (v8i8 (VTBX3Pseudo v8i8:$orig, 7259 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7260 v8i8:$Vn1, dsub_1, 7261 v8i8:$Vn2, dsub_2, 7262 (v8i8 (IMPLICIT_DEF)), dsub_3), 7263 v8i8:$Vm))>; 7264 7265def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, 7266 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7267 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7268 v8i8:$Vn1, dsub_1, 7269 v8i8:$Vn2, dsub_2, 7270 v8i8:$Vn3, dsub_3), 7271 v8i8:$Vm))>; 7272def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7273 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7274 (v8i8 (VTBX4Pseudo v8i8:$orig, 7275 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7276 v8i8:$Vn1, dsub_1, 7277 v8i8:$Vn2, dsub_2, 7278 v8i8:$Vn3, dsub_3), 7279 v8i8:$Vm))>; 7280} 7281 7282// VRINT : Vector Rounding 7283multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 7284 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 7285 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7286 !strconcat("vrint", op), "f32", 7287 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 7288 let Inst{9-7} = op9_7; 7289 } 7290 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7291 !strconcat("vrint", op), "f32", 7292 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 7293 let Inst{9-7} = op9_7; 7294 } 7295 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7296 !strconcat("vrint", op), "f16", 7297 v4f16, v4f16, Int>, 7298 Requires<[HasV8, HasNEON, HasFullFP16]> { 7299 let Inst{9-7} = op9_7; 7300 } 7301 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7302 !strconcat("vrint", op), "f16", 7303 v8f16, v8f16, Int>, 7304 Requires<[HasV8, HasNEON, HasFullFP16]> { 7305 let Inst{9-7} = op9_7; 7306 } 7307 } 7308 7309 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 7310 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>; 7311 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 7312 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; 7313 let Predicates = [HasNEON, HasFullFP16] in { 7314 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"), 7315 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; 7316 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"), 7317 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; 7318 } 7319} 7320 7321defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 7322defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 7323defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 7324defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 7325defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 7326defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 7327 7328// Cryptography instructions 7329let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 7330 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 7331 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 7332 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7333 !strconcat("aes", op), "8", v16i8, v16i8, Int>; 7334 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 7335 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7336 !strconcat("aes", op), "8", v16i8, v16i8, Int>; 7337 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7338 SDPatternOperator Int> 7339 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7340 !strconcat("sha", op), "32", v4i32, v4i32, Int>; 7341 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7342 SDPatternOperator Int> 7343 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7344 !strconcat("sha", op), "32", v4i32, v4i32, Int>; 7345 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 7346 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 7347 !strconcat("sha", op), "32", v4i32, v4i32, Int>; 7348} 7349 7350let Predicates = [HasV8, HasAES] in { 7351def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 7352def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 7353def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 7354def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 7355} 7356 7357let Predicates = [HasV8, HasSHA2] in { 7358def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 7359def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 7360def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 7361def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 7362def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 7363def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 7364def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 7365def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 7366def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 7367def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 7368} 7369 7370let Predicates = [HasNEON] in { 7371def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 7372 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 7373 (SHA1H (SUBREG_TO_REG (i64 0), 7374 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 7375 ssub_0)), 7376 ssub_0)), GPR)>; 7377 7378def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7379 (SHA1C v4i32:$hash_abcd, 7380 (SUBREG_TO_REG (i64 0), 7381 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7382 ssub_0), 7383 v4i32:$wk)>; 7384 7385def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7386 (SHA1M v4i32:$hash_abcd, 7387 (SUBREG_TO_REG (i64 0), 7388 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7389 ssub_0), 7390 v4i32:$wk)>; 7391 7392def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7393 (SHA1P v4i32:$hash_abcd, 7394 (SUBREG_TO_REG (i64 0), 7395 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7396 ssub_0), 7397 v4i32:$wk)>; 7398} 7399 7400//===----------------------------------------------------------------------===// 7401// NEON instructions for single-precision FP math 7402//===----------------------------------------------------------------------===// 7403 7404class N2VSPat<SDNode OpNode, NeonI Inst> 7405 : NEONFPPat<(f32 (OpNode SPR:$a)), 7406 (EXTRACT_SUBREG 7407 (v2f32 (COPY_TO_REGCLASS (Inst 7408 (INSERT_SUBREG 7409 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7410 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 7411 7412class N3VSPat<SDNode OpNode, NeonI Inst> 7413 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 7414 (EXTRACT_SUBREG 7415 (v2f32 (COPY_TO_REGCLASS (Inst 7416 (INSERT_SUBREG 7417 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7418 SPR:$a, ssub_0), 7419 (INSERT_SUBREG 7420 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7421 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7422 7423class N3VSPatFP16<SDNode OpNode, NeonI Inst> 7424 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)), 7425 (EXTRACT_SUBREG 7426 (v4f16 (COPY_TO_REGCLASS (Inst 7427 (INSERT_SUBREG 7428 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7429 HPR:$a, ssub_0), 7430 (INSERT_SUBREG 7431 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7432 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7433 7434class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 7435 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 7436 (EXTRACT_SUBREG 7437 (v2f32 (COPY_TO_REGCLASS (Inst 7438 (INSERT_SUBREG 7439 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7440 SPR:$acc, ssub_0), 7441 (INSERT_SUBREG 7442 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7443 SPR:$a, ssub_0), 7444 (INSERT_SUBREG 7445 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7446 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7447 7448class NVCVTIFPat<SDNode OpNode, NeonI Inst> 7449 : NEONFPPat<(f32 (OpNode GPR:$a)), 7450 (f32 (EXTRACT_SUBREG 7451 (v2f32 (Inst 7452 (INSERT_SUBREG 7453 (v2f32 (IMPLICIT_DEF)), 7454 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 7455 ssub_0))>; 7456class NVCVTFIPat<SDNode OpNode, NeonI Inst> 7457 : NEONFPPat<(i32 (OpNode SPR:$a)), 7458 (i32 (EXTRACT_SUBREG 7459 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 7460 SPR:$a, ssub_0))), 7461 ssub_0))>; 7462 7463def : N3VSPat<fadd, VADDfd>; 7464def : N3VSPat<fsub, VSUBfd>; 7465def : N3VSPat<fmul, VMULfd>; 7466def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 7467 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7468def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 7469 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7470def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 7471 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7472def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 7473 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7474def : N2VSPat<fabs, VABSfd>; 7475def : N2VSPat<fneg, VNEGfd>; 7476def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>; 7477def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>; 7478def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>; 7479def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>; 7480def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 7481def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 7482def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 7483def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 7484 7485// NEON doesn't have any f64 conversions, so provide patterns to make 7486// sure the VFP conversions match when extracting from a vector. 7487def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7488 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7489def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7490 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7491def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7492 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7493def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7494 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7495 7496 7497// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 7498def : Pat<(f32 (bitconvert GPR:$a)), 7499 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7500 Requires<[HasNEON, DontUseVMOVSR]>; 7501def : Pat<(arm_vmovsr GPR:$a), 7502 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7503 Requires<[HasNEON, DontUseVMOVSR]>; 7504 7505//===----------------------------------------------------------------------===// 7506// Non-Instruction Patterns or Endianess - Revert Patterns 7507//===----------------------------------------------------------------------===// 7508 7509// bit_convert 7510// 64 bit conversions 7511let Predicates = [HasNEON] in { 7512def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 7513def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 7514 7515def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 7516def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 7517 7518def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>; 7519def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>; 7520 7521def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16 DPR:$src)>; 7522def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16 DPR:$src)>; 7523 7524// 128 bit conversions 7525def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 7526def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 7527 7528def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 7529def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 7530 7531def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; 7532def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>; 7533 7534def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16 QPR:$src)>; 7535def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16 QPR:$src)>; 7536} 7537 7538let Predicates = [IsLE,HasNEON] in { 7539 // 64 bit conversions 7540 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 7541 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 7542 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>; 7543 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (f64 DPR:$src)>; 7544 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 7545 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 7546 7547 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 7548 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 7549 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>; 7550 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>; 7551 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 7552 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 7553 7554 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 7555 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 7556 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>; 7557 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>; 7558 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 7559 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 7560 7561 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 7562 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 7563 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>; 7564 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>; 7565 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 7566 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 7567 7568 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>; 7569 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>; 7570 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>; 7571 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>; 7572 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>; 7573 7574 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (v4bf16 DPR:$src)>; 7575 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>; 7576 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>; 7577 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>; 7578 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (v4bf16 DPR:$src)>; 7579 7580 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 7581 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 7582 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 7583 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 7584 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 7585 7586 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 7587 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 7588 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 7589 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 7590 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>; 7591 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (v8i8 DPR:$src)>; 7592 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 7593 7594 // 128 bit conversions 7595 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 7596 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 7597 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; 7598 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>; 7599 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 7600 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 7601 7602 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 7603 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 7604 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>; 7605 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>; 7606 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 7607 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 7608 7609 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 7610 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 7611 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>; 7612 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>; 7613 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 7614 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 7615 7616 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 7617 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 7618 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>; 7619 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>; 7620 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 7621 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 7622 7623 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; 7624 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>; 7625 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>; 7626 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>; 7627 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>; 7628 7629 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>; 7630 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>; 7631 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>; 7632 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>; 7633 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>; 7634 7635 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 7636 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 7637 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 7638 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 7639 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 7640 7641 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 7642 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 7643 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 7644 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 7645 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; 7646 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>; 7647 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 7648} 7649 7650let Predicates = [IsBE,HasNEON] in { 7651 // 64 bit conversions 7652 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7653 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7654 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7655 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7656 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7657 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7658 7659 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7660 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7661 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7662 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7663 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7664 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7665 7666 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7667 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7668 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7669 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7670 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7671 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7672 7673 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7674 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7675 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7676 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7677 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7678 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7679 7680 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7681 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7682 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7683 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7684 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7685 7686 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7687 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7688 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7689 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7690 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7691 7692 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7693 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7694 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7695 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7696 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7697 7698 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 7699 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 7700 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 7701 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 7702 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>; 7703 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (VREV16d8 DPR:$src)>; 7704 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 7705 7706 // 128 bit conversions 7707 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7708 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7709 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7710 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7711 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7712 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7713 7714 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7715 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7716 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7717 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7718 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7719 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7720 7721 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7722 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7723 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7724 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7725 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7726 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7727 7728 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7729 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7730 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7731 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7732 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7733 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7734 7735 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7736 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7737 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7738 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7739 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7740 7741 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7742 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7743 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7744 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7745 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7746 7747 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7748 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7749 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7750 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7751 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7752 7753 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 7754 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 7755 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 7756 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 7757 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>; 7758 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8 QPR:$src)>; 7759 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 7760} 7761 7762let Predicates = [HasNEON] in { 7763 // Here we match the specific SDNode type 'ARMVectorRegCastImpl' 7764 // rather than the more general 'ARMVectorRegCast' which would also 7765 // match some bitconverts. If we use the latter in cases where the 7766 // input and output types are the same, the bitconvert gets elided 7767 // and we end up generating a nonsense match of nothing. 7768 7769 foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7770 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7771 def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>; 7772 7773 foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7774 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7775 def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>; 7776} 7777 7778// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian 7779let Predicates = [IsBE,HasNEON] in { 7780def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 7781 (VREV64q8 (VLD1q8 addrmode6:$addr))>; 7782def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7783 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>; 7784def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 7785 (VREV64q16 (VLD1q16 addrmode6:$addr))>; 7786def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7787 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>; 7788} 7789 7790// Fold extracting an element out of a v2i32 into a vfp register. 7791def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 7792 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>, 7793 Requires<[HasNEON]>; 7794 7795// Vector lengthening move with load, matching extending loads. 7796 7797// extload, zextload and sextload for a standard lengthening load. Example: 7798// Lengthen_Single<"8", "i16", "8"> = 7799// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 7800// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 7801// (f64 (IMPLICIT_DEF)), (i32 0)))>; 7802multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 7803 let AddedComplexity = 10 in { 7804 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7805 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 7806 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7807 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7808 Requires<[HasNEON]>; 7809 7810 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7811 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 7812 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7813 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7814 Requires<[HasNEON]>; 7815 7816 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7817 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 7818 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 7819 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7820 Requires<[HasNEON]>; 7821 } 7822} 7823 7824// extload, zextload and sextload for a lengthening load which only uses 7825// half the lanes available. Example: 7826// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 7827// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 7828// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7829// (f64 (IMPLICIT_DEF)), (i32 0))), 7830// dsub_0)>; 7831multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 7832 string InsnLanes, string InsnTy> { 7833 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7834 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7835 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7836 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7837 dsub_0)>, 7838 Requires<[HasNEON]>; 7839 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7840 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7841 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7842 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7843 dsub_0)>, 7844 Requires<[HasNEON]>; 7845 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7846 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7847 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7848 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7849 dsub_0)>, 7850 Requires<[HasNEON]>; 7851} 7852 7853// The following class definition is basically a copy of the 7854// Lengthen_HalfSingle definition above, however with an additional parameter 7855// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7856// data loaded by VLD1LN into proper vector format in big endian mode. 7857multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7858 string InsnLanes, string InsnTy, string RevLanes> { 7859 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7860 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7861 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7862 (!cast<Instruction>("VREV32d" # RevLanes) 7863 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7864 dsub_0)>, 7865 Requires<[HasNEON]>; 7866 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7867 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7868 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7869 (!cast<Instruction>("VREV32d" # RevLanes) 7870 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7871 dsub_0)>, 7872 Requires<[HasNEON]>; 7873 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7874 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7875 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7876 (!cast<Instruction>("VREV32d" # RevLanes) 7877 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7878 dsub_0)>, 7879 Requires<[HasNEON]>; 7880} 7881 7882// extload, zextload and sextload for a lengthening load followed by another 7883// lengthening load, to quadruple the initial length. 7884// 7885// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 7886// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 7887// (EXTRACT_SUBREG (VMOVLuv4i32 7888// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7889// (f64 (IMPLICIT_DEF)), 7890// (i32 0))), 7891// dsub_0)), 7892// dsub_0)>; 7893multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 7894 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7895 string Insn2Ty> { 7896 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7897 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7898 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7899 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7900 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7901 dsub_0))>, 7902 Requires<[HasNEON]>; 7903 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7904 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7905 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7906 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7907 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7908 dsub_0))>, 7909 Requires<[HasNEON]>; 7910 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7911 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7912 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7913 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7914 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7915 dsub_0))>, 7916 Requires<[HasNEON]>; 7917} 7918 7919// The following class definition is basically a copy of the 7920// Lengthen_Double definition above, however with an additional parameter 7921// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7922// data loaded by VLD1LN into proper vector format in big endian mode. 7923multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7924 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7925 string Insn2Ty, string RevLanes> { 7926 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7927 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7928 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7929 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7930 (!cast<Instruction>("VREV32d" # RevLanes) 7931 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7932 dsub_0))>, 7933 Requires<[HasNEON]>; 7934 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7935 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7936 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7937 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7938 (!cast<Instruction>("VREV32d" # RevLanes) 7939 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7940 dsub_0))>, 7941 Requires<[HasNEON]>; 7942 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7943 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7944 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7945 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7946 (!cast<Instruction>("VREV32d" # RevLanes) 7947 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7948 dsub_0))>, 7949 Requires<[HasNEON]>; 7950} 7951 7952// extload, zextload and sextload for a lengthening load followed by another 7953// lengthening load, to quadruple the initial length, but which ends up only 7954// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 7955// 7956// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 7957// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 7958// (EXTRACT_SUBREG (VMOVLuv4i32 7959// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 7960// (f64 (IMPLICIT_DEF)), (i32 0))), 7961// dsub_0)), 7962// dsub_0)>; 7963multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 7964 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7965 string Insn2Ty> { 7966 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7967 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7968 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7969 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7970 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7971 dsub_0)), 7972 dsub_0)>, 7973 Requires<[HasNEON]>; 7974 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7975 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7976 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7977 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7978 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7979 dsub_0)), 7980 dsub_0)>, 7981 Requires<[HasNEON]>; 7982 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7983 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7984 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7985 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7986 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7987 dsub_0)), 7988 dsub_0)>, 7989 Requires<[HasNEON]>; 7990} 7991 7992// The following class definition is basically a copy of the 7993// Lengthen_HalfDouble definition above, however with an additional VREV16d8 7994// instruction to convert data loaded by VLD1LN into proper vector format 7995// in big endian mode. 7996multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7997 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7998 string Insn2Ty> { 7999 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 8000 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 8001 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 8002 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 8003 (VREV16d8 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 8004 dsub_0)), 8005 dsub_0)>, 8006 Requires<[HasNEON]>; 8007 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 8008 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 8009 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 8010 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 8011 (VREV16d8 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 8012 dsub_0)), 8013 dsub_0)>, 8014 Requires<[HasNEON]>; 8015 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 8016 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 8017 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 8018 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 8019 (VREV16d8 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 8020 dsub_0)), 8021 dsub_0)>, 8022 Requires<[HasNEON]>; 8023} 8024 8025defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 8026defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 8027defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 8028 8029let Predicates = [HasNEON,IsLE] in { 8030 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 8031 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 8032 8033 // Double lengthening - v4i8 -> v4i16 -> v4i32 8034 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 8035 // v2i8 -> v2i16 -> v2i32 8036 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 8037 // v2i16 -> v2i32 -> v2i64 8038 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 8039} 8040 8041let Predicates = [HasNEON,IsBE] in { 8042 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 8043 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 8044 8045 // Double lengthening - v4i8 -> v4i16 -> v4i32 8046 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 8047 // v2i8 -> v2i16 -> v2i32 8048 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 8049 // v2i16 -> v2i32 -> v2i64 8050 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 8051} 8052 8053// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 8054let Predicates = [HasNEON,IsLE] in { 8055 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 8056 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8057 (VLD1LNd16 addrmode6:$addr, 8058 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 8059 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 8060 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8061 (VLD1LNd16 addrmode6:$addr, 8062 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 8063 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 8064 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 8065 (VLD1LNd16 addrmode6:$addr, 8066 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 8067} 8068// The following patterns are basically a copy of the patterns above, 8069// however with an additional VREV16d instruction to convert data 8070// loaded by VLD1LN into proper vector format in big endian mode. 8071let Predicates = [HasNEON,IsBE] in { 8072 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 8073 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8074 (VREV16d8 8075 (VLD1LNd16 addrmode6:$addr, 8076 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 8077 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 8078 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8079 (VREV16d8 8080 (VLD1LNd16 addrmode6:$addr, 8081 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 8082 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 8083 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 8084 (VREV16d8 8085 (VLD1LNd16 addrmode6:$addr, 8086 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 8087} 8088 8089let Predicates = [HasNEON] in { 8090def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), 8091 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8092def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), 8093 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8094def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8095 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8096def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), 8097 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8098def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), 8099 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8100def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8101 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8102def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8103 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8104} 8105 8106//===----------------------------------------------------------------------===// 8107// Assembler aliases 8108// 8109 8110def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 8111 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 8112def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 8113 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 8114 8115// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 8116defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 8117 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8118defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 8119 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8120defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8121 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8122defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8123 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8124defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8125 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8126defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8127 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8128defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8129 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8130defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8131 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8132// ... two-operand aliases 8133defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8134 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8135defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8136 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8137defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8138 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8139defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8140 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8141defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8142 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8143defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8144 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8145// ... immediates 8146def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8147 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8148def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8149 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8150def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8151 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8152def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8153 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8154 8155 8156// VLD1 single-lane pseudo-instructions. These need special handling for 8157// the lane index that an InstAlias can't handle, so we use these instead. 8158def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 8159 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8160 pred:$p)>; 8161def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 8162 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8163 pred:$p)>; 8164def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 8165 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8166 pred:$p)>; 8167 8168def VLD1LNdWB_fixed_Asm_8 : 8169 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 8170 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8171 pred:$p)>; 8172def VLD1LNdWB_fixed_Asm_16 : 8173 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 8174 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8175 pred:$p)>; 8176def VLD1LNdWB_fixed_Asm_32 : 8177 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 8178 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8179 pred:$p)>; 8180def VLD1LNdWB_register_Asm_8 : 8181 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 8182 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8183 rGPR:$Rm, pred:$p)>; 8184def VLD1LNdWB_register_Asm_16 : 8185 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 8186 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8187 rGPR:$Rm, pred:$p)>; 8188def VLD1LNdWB_register_Asm_32 : 8189 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 8190 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8191 rGPR:$Rm, pred:$p)>; 8192 8193 8194// VST1 single-lane pseudo-instructions. These need special handling for 8195// the lane index that an InstAlias can't handle, so we use these instead. 8196def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 8197 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8198 pred:$p)>; 8199def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 8200 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8201 pred:$p)>; 8202def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 8203 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8204 pred:$p)>; 8205 8206def VST1LNdWB_fixed_Asm_8 : 8207 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 8208 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8209 pred:$p)>; 8210def VST1LNdWB_fixed_Asm_16 : 8211 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 8212 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8213 pred:$p)>; 8214def VST1LNdWB_fixed_Asm_32 : 8215 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 8216 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8217 pred:$p)>; 8218def VST1LNdWB_register_Asm_8 : 8219 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 8220 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8221 rGPR:$Rm, pred:$p)>; 8222def VST1LNdWB_register_Asm_16 : 8223 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 8224 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8225 rGPR:$Rm, pred:$p)>; 8226def VST1LNdWB_register_Asm_32 : 8227 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 8228 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8229 rGPR:$Rm, pred:$p)>; 8230 8231// VLD2 single-lane pseudo-instructions. These need special handling for 8232// the lane index that an InstAlias can't handle, so we use these instead. 8233def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 8234 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8235 pred:$p)>; 8236def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8237 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8238 pred:$p)>; 8239def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8240 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 8241def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8242 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8243 pred:$p)>; 8244def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8245 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8246 pred:$p)>; 8247 8248def VLD2LNdWB_fixed_Asm_8 : 8249 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 8250 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8251 pred:$p)>; 8252def VLD2LNdWB_fixed_Asm_16 : 8253 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8254 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8255 pred:$p)>; 8256def VLD2LNdWB_fixed_Asm_32 : 8257 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8258 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8259 pred:$p)>; 8260def VLD2LNqWB_fixed_Asm_16 : 8261 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8262 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8263 pred:$p)>; 8264def VLD2LNqWB_fixed_Asm_32 : 8265 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8266 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8267 pred:$p)>; 8268def VLD2LNdWB_register_Asm_8 : 8269 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 8270 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8271 rGPR:$Rm, pred:$p)>; 8272def VLD2LNdWB_register_Asm_16 : 8273 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8274 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8275 rGPR:$Rm, pred:$p)>; 8276def VLD2LNdWB_register_Asm_32 : 8277 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8278 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8279 rGPR:$Rm, pred:$p)>; 8280def VLD2LNqWB_register_Asm_16 : 8281 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8282 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8283 rGPR:$Rm, pred:$p)>; 8284def VLD2LNqWB_register_Asm_32 : 8285 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8286 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8287 rGPR:$Rm, pred:$p)>; 8288 8289 8290// VST2 single-lane pseudo-instructions. These need special handling for 8291// the lane index that an InstAlias can't handle, so we use these instead. 8292def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 8293 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8294 pred:$p)>; 8295def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8296 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8297 pred:$p)>; 8298def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8299 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8300 pred:$p)>; 8301def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8302 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8303 pred:$p)>; 8304def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8305 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8306 pred:$p)>; 8307 8308def VST2LNdWB_fixed_Asm_8 : 8309 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 8310 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8311 pred:$p)>; 8312def VST2LNdWB_fixed_Asm_16 : 8313 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8314 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8315 pred:$p)>; 8316def VST2LNdWB_fixed_Asm_32 : 8317 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8318 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8319 pred:$p)>; 8320def VST2LNqWB_fixed_Asm_16 : 8321 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8322 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8323 pred:$p)>; 8324def VST2LNqWB_fixed_Asm_32 : 8325 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8326 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8327 pred:$p)>; 8328def VST2LNdWB_register_Asm_8 : 8329 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 8330 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8331 rGPR:$Rm, pred:$p)>; 8332def VST2LNdWB_register_Asm_16 : 8333 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8334 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8335 rGPR:$Rm, pred:$p)>; 8336def VST2LNdWB_register_Asm_32 : 8337 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8338 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8339 rGPR:$Rm, pred:$p)>; 8340def VST2LNqWB_register_Asm_16 : 8341 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8342 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8343 rGPR:$Rm, pred:$p)>; 8344def VST2LNqWB_register_Asm_32 : 8345 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8346 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8347 rGPR:$Rm, pred:$p)>; 8348 8349// VLD3 all-lanes pseudo-instructions. These need special handling for 8350// the lane index that an InstAlias can't handle, so we use these instead. 8351def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8352 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8353 pred:$p)>; 8354def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8355 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8356 pred:$p)>; 8357def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8358 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8359 pred:$p)>; 8360def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8361 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8362 pred:$p)>; 8363def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8364 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8365 pred:$p)>; 8366def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8367 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8368 pred:$p)>; 8369 8370def VLD3DUPdWB_fixed_Asm_8 : 8371 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8372 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8373 pred:$p)>; 8374def VLD3DUPdWB_fixed_Asm_16 : 8375 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8376 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8377 pred:$p)>; 8378def VLD3DUPdWB_fixed_Asm_32 : 8379 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8380 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8381 pred:$p)>; 8382def VLD3DUPqWB_fixed_Asm_8 : 8383 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8384 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8385 pred:$p)>; 8386def VLD3DUPqWB_fixed_Asm_16 : 8387 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8388 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8389 pred:$p)>; 8390def VLD3DUPqWB_fixed_Asm_32 : 8391 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8392 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8393 pred:$p)>; 8394def VLD3DUPdWB_register_Asm_8 : 8395 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8396 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8397 rGPR:$Rm, pred:$p)>; 8398def VLD3DUPdWB_register_Asm_16 : 8399 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8400 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8401 rGPR:$Rm, pred:$p)>; 8402def VLD3DUPdWB_register_Asm_32 : 8403 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8404 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8405 rGPR:$Rm, pred:$p)>; 8406def VLD3DUPqWB_register_Asm_8 : 8407 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8408 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8409 rGPR:$Rm, pred:$p)>; 8410def VLD3DUPqWB_register_Asm_16 : 8411 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8412 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8413 rGPR:$Rm, pred:$p)>; 8414def VLD3DUPqWB_register_Asm_32 : 8415 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8416 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8417 rGPR:$Rm, pred:$p)>; 8418 8419 8420// VLD3 single-lane pseudo-instructions. These need special handling for 8421// the lane index that an InstAlias can't handle, so we use these instead. 8422def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8423 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8424 pred:$p)>; 8425def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8426 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8427 pred:$p)>; 8428def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8429 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8430 pred:$p)>; 8431def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8432 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8433 pred:$p)>; 8434def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8435 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8436 pred:$p)>; 8437 8438def VLD3LNdWB_fixed_Asm_8 : 8439 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8440 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8441 pred:$p)>; 8442def VLD3LNdWB_fixed_Asm_16 : 8443 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8444 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8445 pred:$p)>; 8446def VLD3LNdWB_fixed_Asm_32 : 8447 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8448 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8449 pred:$p)>; 8450def VLD3LNqWB_fixed_Asm_16 : 8451 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8452 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8453 pred:$p)>; 8454def VLD3LNqWB_fixed_Asm_32 : 8455 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8456 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8457 pred:$p)>; 8458def VLD3LNdWB_register_Asm_8 : 8459 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8460 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8461 rGPR:$Rm, pred:$p)>; 8462def VLD3LNdWB_register_Asm_16 : 8463 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8464 (ins VecListThreeDHWordIndexed:$list, 8465 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8466def VLD3LNdWB_register_Asm_32 : 8467 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8468 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8469 rGPR:$Rm, pred:$p)>; 8470def VLD3LNqWB_register_Asm_16 : 8471 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8472 (ins VecListThreeQHWordIndexed:$list, 8473 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8474def VLD3LNqWB_register_Asm_32 : 8475 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8476 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8477 rGPR:$Rm, pred:$p)>; 8478 8479// VLD3 multiple structure pseudo-instructions. These need special handling for 8480// the vector operands that the normal instructions don't yet model. 8481// FIXME: Remove these when the register classes and instructions are updated. 8482def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8483 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8484def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8485 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8486def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8487 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8488def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8489 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8490def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8491 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8492def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8493 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8494 8495def VLD3dWB_fixed_Asm_8 : 8496 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8497 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8498def VLD3dWB_fixed_Asm_16 : 8499 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8500 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8501def VLD3dWB_fixed_Asm_32 : 8502 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8503 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8504def VLD3qWB_fixed_Asm_8 : 8505 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8506 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8507def VLD3qWB_fixed_Asm_16 : 8508 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8509 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8510def VLD3qWB_fixed_Asm_32 : 8511 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8512 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8513def VLD3dWB_register_Asm_8 : 8514 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8515 (ins VecListThreeD:$list, addrmode6align64:$addr, 8516 rGPR:$Rm, pred:$p)>; 8517def VLD3dWB_register_Asm_16 : 8518 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8519 (ins VecListThreeD:$list, addrmode6align64:$addr, 8520 rGPR:$Rm, pred:$p)>; 8521def VLD3dWB_register_Asm_32 : 8522 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8523 (ins VecListThreeD:$list, addrmode6align64:$addr, 8524 rGPR:$Rm, pred:$p)>; 8525def VLD3qWB_register_Asm_8 : 8526 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8527 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8528 rGPR:$Rm, pred:$p)>; 8529def VLD3qWB_register_Asm_16 : 8530 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8531 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8532 rGPR:$Rm, pred:$p)>; 8533def VLD3qWB_register_Asm_32 : 8534 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8535 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8536 rGPR:$Rm, pred:$p)>; 8537 8538// VST3 single-lane pseudo-instructions. These need special handling for 8539// the lane index that an InstAlias can't handle, so we use these instead. 8540def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8541 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8542 pred:$p)>; 8543def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8544 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8545 pred:$p)>; 8546def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8547 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8548 pred:$p)>; 8549def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8550 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8551 pred:$p)>; 8552def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8553 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8554 pred:$p)>; 8555 8556def VST3LNdWB_fixed_Asm_8 : 8557 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8558 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8559 pred:$p)>; 8560def VST3LNdWB_fixed_Asm_16 : 8561 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8562 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8563 pred:$p)>; 8564def VST3LNdWB_fixed_Asm_32 : 8565 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8566 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8567 pred:$p)>; 8568def VST3LNqWB_fixed_Asm_16 : 8569 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8570 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8571 pred:$p)>; 8572def VST3LNqWB_fixed_Asm_32 : 8573 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8574 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8575 pred:$p)>; 8576def VST3LNdWB_register_Asm_8 : 8577 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8578 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8579 rGPR:$Rm, pred:$p)>; 8580def VST3LNdWB_register_Asm_16 : 8581 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8582 (ins VecListThreeDHWordIndexed:$list, 8583 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8584def VST3LNdWB_register_Asm_32 : 8585 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8586 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8587 rGPR:$Rm, pred:$p)>; 8588def VST3LNqWB_register_Asm_16 : 8589 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8590 (ins VecListThreeQHWordIndexed:$list, 8591 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8592def VST3LNqWB_register_Asm_32 : 8593 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8594 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8595 rGPR:$Rm, pred:$p)>; 8596 8597 8598// VST3 multiple structure pseudo-instructions. These need special handling for 8599// the vector operands that the normal instructions don't yet model. 8600// FIXME: Remove these when the register classes and instructions are updated. 8601def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8602 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8603def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8604 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8605def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8606 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8607def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8608 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8609def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8610 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8611def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8612 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8613 8614def VST3dWB_fixed_Asm_8 : 8615 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8616 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8617def VST3dWB_fixed_Asm_16 : 8618 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8619 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8620def VST3dWB_fixed_Asm_32 : 8621 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8622 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8623def VST3qWB_fixed_Asm_8 : 8624 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8625 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8626def VST3qWB_fixed_Asm_16 : 8627 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8628 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8629def VST3qWB_fixed_Asm_32 : 8630 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8631 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8632def VST3dWB_register_Asm_8 : 8633 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8634 (ins VecListThreeD:$list, addrmode6align64:$addr, 8635 rGPR:$Rm, pred:$p)>; 8636def VST3dWB_register_Asm_16 : 8637 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8638 (ins VecListThreeD:$list, addrmode6align64:$addr, 8639 rGPR:$Rm, pred:$p)>; 8640def VST3dWB_register_Asm_32 : 8641 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8642 (ins VecListThreeD:$list, addrmode6align64:$addr, 8643 rGPR:$Rm, pred:$p)>; 8644def VST3qWB_register_Asm_8 : 8645 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8646 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8647 rGPR:$Rm, pred:$p)>; 8648def VST3qWB_register_Asm_16 : 8649 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8650 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8651 rGPR:$Rm, pred:$p)>; 8652def VST3qWB_register_Asm_32 : 8653 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8654 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8655 rGPR:$Rm, pred:$p)>; 8656 8657// VLD4 all-lanes pseudo-instructions. These need special handling for 8658// the lane index that an InstAlias can't handle, so we use these instead. 8659def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8660 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8661 pred:$p)>; 8662def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8663 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8664 pred:$p)>; 8665def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8666 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8667 pred:$p)>; 8668def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8669 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8670 pred:$p)>; 8671def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8672 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8673 pred:$p)>; 8674def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8675 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8676 pred:$p)>; 8677 8678def VLD4DUPdWB_fixed_Asm_8 : 8679 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8680 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8681 pred:$p)>; 8682def VLD4DUPdWB_fixed_Asm_16 : 8683 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8684 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8685 pred:$p)>; 8686def VLD4DUPdWB_fixed_Asm_32 : 8687 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8688 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8689 pred:$p)>; 8690def VLD4DUPqWB_fixed_Asm_8 : 8691 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8692 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8693 pred:$p)>; 8694def VLD4DUPqWB_fixed_Asm_16 : 8695 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8696 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8697 pred:$p)>; 8698def VLD4DUPqWB_fixed_Asm_32 : 8699 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8700 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8701 pred:$p)>; 8702def VLD4DUPdWB_register_Asm_8 : 8703 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8704 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8705 rGPR:$Rm, pred:$p)>; 8706def VLD4DUPdWB_register_Asm_16 : 8707 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8708 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8709 rGPR:$Rm, pred:$p)>; 8710def VLD4DUPdWB_register_Asm_32 : 8711 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8712 (ins VecListFourDAllLanes:$list, 8713 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8714def VLD4DUPqWB_register_Asm_8 : 8715 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8716 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8717 rGPR:$Rm, pred:$p)>; 8718def VLD4DUPqWB_register_Asm_16 : 8719 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8720 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8721 rGPR:$Rm, pred:$p)>; 8722def VLD4DUPqWB_register_Asm_32 : 8723 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8724 (ins VecListFourQAllLanes:$list, 8725 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8726 8727 8728// VLD4 single-lane pseudo-instructions. These need special handling for 8729// the lane index that an InstAlias can't handle, so we use these instead. 8730def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8731 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8732 pred:$p)>; 8733def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8734 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8735 pred:$p)>; 8736def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8737 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8738 pred:$p)>; 8739def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8740 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8741 pred:$p)>; 8742def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8743 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8744 pred:$p)>; 8745 8746def VLD4LNdWB_fixed_Asm_8 : 8747 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8748 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8749 pred:$p)>; 8750def VLD4LNdWB_fixed_Asm_16 : 8751 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8752 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8753 pred:$p)>; 8754def VLD4LNdWB_fixed_Asm_32 : 8755 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8756 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8757 pred:$p)>; 8758def VLD4LNqWB_fixed_Asm_16 : 8759 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8760 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8761 pred:$p)>; 8762def VLD4LNqWB_fixed_Asm_32 : 8763 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8764 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8765 pred:$p)>; 8766def VLD4LNdWB_register_Asm_8 : 8767 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8768 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8769 rGPR:$Rm, pred:$p)>; 8770def VLD4LNdWB_register_Asm_16 : 8771 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8772 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8773 rGPR:$Rm, pred:$p)>; 8774def VLD4LNdWB_register_Asm_32 : 8775 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8776 (ins VecListFourDWordIndexed:$list, 8777 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8778def VLD4LNqWB_register_Asm_16 : 8779 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8780 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8781 rGPR:$Rm, pred:$p)>; 8782def VLD4LNqWB_register_Asm_32 : 8783 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8784 (ins VecListFourQWordIndexed:$list, 8785 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8786 8787 8788 8789// VLD4 multiple structure pseudo-instructions. These need special handling for 8790// the vector operands that the normal instructions don't yet model. 8791// FIXME: Remove these when the register classes and instructions are updated. 8792def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8793 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8794 pred:$p)>; 8795def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8796 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8797 pred:$p)>; 8798def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8799 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8800 pred:$p)>; 8801def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8802 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8803 pred:$p)>; 8804def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8805 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8806 pred:$p)>; 8807def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8808 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8809 pred:$p)>; 8810 8811def VLD4dWB_fixed_Asm_8 : 8812 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8813 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8814 pred:$p)>; 8815def VLD4dWB_fixed_Asm_16 : 8816 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8817 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8818 pred:$p)>; 8819def VLD4dWB_fixed_Asm_32 : 8820 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8821 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8822 pred:$p)>; 8823def VLD4qWB_fixed_Asm_8 : 8824 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8825 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8826 pred:$p)>; 8827def VLD4qWB_fixed_Asm_16 : 8828 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8829 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8830 pred:$p)>; 8831def VLD4qWB_fixed_Asm_32 : 8832 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8833 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8834 pred:$p)>; 8835def VLD4dWB_register_Asm_8 : 8836 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8837 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8838 rGPR:$Rm, pred:$p)>; 8839def VLD4dWB_register_Asm_16 : 8840 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8841 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8842 rGPR:$Rm, pred:$p)>; 8843def VLD4dWB_register_Asm_32 : 8844 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8845 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8846 rGPR:$Rm, pred:$p)>; 8847def VLD4qWB_register_Asm_8 : 8848 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8849 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8850 rGPR:$Rm, pred:$p)>; 8851def VLD4qWB_register_Asm_16 : 8852 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8853 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8854 rGPR:$Rm, pred:$p)>; 8855def VLD4qWB_register_Asm_32 : 8856 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8857 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8858 rGPR:$Rm, pred:$p)>; 8859 8860// VST4 single-lane pseudo-instructions. These need special handling for 8861// the lane index that an InstAlias can't handle, so we use these instead. 8862def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8863 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8864 pred:$p)>; 8865def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8866 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8867 pred:$p)>; 8868def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8869 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8870 pred:$p)>; 8871def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8872 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8873 pred:$p)>; 8874def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8875 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8876 pred:$p)>; 8877 8878def VST4LNdWB_fixed_Asm_8 : 8879 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8880 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8881 pred:$p)>; 8882def VST4LNdWB_fixed_Asm_16 : 8883 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8884 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8885 pred:$p)>; 8886def VST4LNdWB_fixed_Asm_32 : 8887 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8888 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8889 pred:$p)>; 8890def VST4LNqWB_fixed_Asm_16 : 8891 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8892 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8893 pred:$p)>; 8894def VST4LNqWB_fixed_Asm_32 : 8895 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8896 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8897 pred:$p)>; 8898def VST4LNdWB_register_Asm_8 : 8899 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8900 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8901 rGPR:$Rm, pred:$p)>; 8902def VST4LNdWB_register_Asm_16 : 8903 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8904 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8905 rGPR:$Rm, pred:$p)>; 8906def VST4LNdWB_register_Asm_32 : 8907 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8908 (ins VecListFourDWordIndexed:$list, 8909 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8910def VST4LNqWB_register_Asm_16 : 8911 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8912 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8913 rGPR:$Rm, pred:$p)>; 8914def VST4LNqWB_register_Asm_32 : 8915 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8916 (ins VecListFourQWordIndexed:$list, 8917 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8918 8919 8920// VST4 multiple structure pseudo-instructions. These need special handling for 8921// the vector operands that the normal instructions don't yet model. 8922// FIXME: Remove these when the register classes and instructions are updated. 8923def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8924 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8925 pred:$p)>; 8926def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8927 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8928 pred:$p)>; 8929def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8930 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8931 pred:$p)>; 8932def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8933 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8934 pred:$p)>; 8935def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8936 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8937 pred:$p)>; 8938def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8939 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8940 pred:$p)>; 8941 8942def VST4dWB_fixed_Asm_8 : 8943 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8944 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8945 pred:$p)>; 8946def VST4dWB_fixed_Asm_16 : 8947 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8948 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8949 pred:$p)>; 8950def VST4dWB_fixed_Asm_32 : 8951 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8952 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8953 pred:$p)>; 8954def VST4qWB_fixed_Asm_8 : 8955 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8956 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8957 pred:$p)>; 8958def VST4qWB_fixed_Asm_16 : 8959 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8960 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8961 pred:$p)>; 8962def VST4qWB_fixed_Asm_32 : 8963 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8964 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8965 pred:$p)>; 8966def VST4dWB_register_Asm_8 : 8967 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8968 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8969 rGPR:$Rm, pred:$p)>; 8970def VST4dWB_register_Asm_16 : 8971 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8972 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8973 rGPR:$Rm, pred:$p)>; 8974def VST4dWB_register_Asm_32 : 8975 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8976 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8977 rGPR:$Rm, pred:$p)>; 8978def VST4qWB_register_Asm_8 : 8979 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8980 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8981 rGPR:$Rm, pred:$p)>; 8982def VST4qWB_register_Asm_16 : 8983 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8984 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8985 rGPR:$Rm, pred:$p)>; 8986def VST4qWB_register_Asm_32 : 8987 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8988 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8989 rGPR:$Rm, pred:$p)>; 8990 8991// VMOV/VMVN takes an optional datatype suffix 8992defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8993 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 8994defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8995 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 8996 8997defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8998 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 8999defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 9000 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 9001 9002// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 9003// D-register versions. 9004def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 9005 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9006def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 9007 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9008def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 9009 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9010def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 9011 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9012def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 9013 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9014def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 9015 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9016def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 9017 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9018let Predicates = [HasNEON, HasFullFP16] in 9019def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", 9020 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9021// Q-register versions. 9022def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 9023 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9024def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 9025 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9026def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 9027 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9028def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 9029 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9030def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 9031 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9032def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 9033 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9034def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 9035 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9036let Predicates = [HasNEON, HasFullFP16] in 9037def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", 9038 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9039 9040// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 9041// D-register versions. 9042def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 9043 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9044def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 9045 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9046def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 9047 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9048def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 9049 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9050def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 9051 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9052def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 9053 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9054def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 9055 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9056let Predicates = [HasNEON, HasFullFP16] in 9057def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", 9058 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9059// Q-register versions. 9060def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 9061 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9062def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 9063 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9064def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 9065 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9066def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 9067 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9068def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 9069 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9070def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 9071 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9072def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 9073 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9074let Predicates = [HasNEON, HasFullFP16] in 9075def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", 9076 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9077 9078// VSWP allows, but does not require, a type suffix. 9079defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 9080 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 9081defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 9082 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 9083 9084// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 9085defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 9086 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 9087defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 9088 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 9089defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 9090 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 9091defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 9092 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9093defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 9094 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9095defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 9096 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9097 9098// "vmov Rd, #-imm" can be handled via "vmvn". 9099def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 9100 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9101def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 9102 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9103def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 9104 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9105def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 9106 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9107 9108// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 9109// these should restrict to just the Q register variants, but the register 9110// classes are enough to match correctly regardless, so we keep it simple 9111// and just use MnemonicAlias. 9112def : NEONMnemonicAlias<"vbicq", "vbic">; 9113def : NEONMnemonicAlias<"vandq", "vand">; 9114def : NEONMnemonicAlias<"veorq", "veor">; 9115def : NEONMnemonicAlias<"vorrq", "vorr">; 9116 9117def : NEONMnemonicAlias<"vmovq", "vmov">; 9118def : NEONMnemonicAlias<"vmvnq", "vmvn">; 9119// Explicit versions for floating point so that the FPImm variants get 9120// handled early. The parser gets confused otherwise. 9121def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 9122def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 9123 9124def : NEONMnemonicAlias<"vaddq", "vadd">; 9125def : NEONMnemonicAlias<"vsubq", "vsub">; 9126 9127def : NEONMnemonicAlias<"vminq", "vmin">; 9128def : NEONMnemonicAlias<"vmaxq", "vmax">; 9129 9130def : NEONMnemonicAlias<"vmulq", "vmul">; 9131 9132def : NEONMnemonicAlias<"vabsq", "vabs">; 9133 9134def : NEONMnemonicAlias<"vshlq", "vshl">; 9135def : NEONMnemonicAlias<"vshrq", "vshr">; 9136 9137def : NEONMnemonicAlias<"vcvtq", "vcvt">; 9138 9139def : NEONMnemonicAlias<"vcleq", "vcle">; 9140def : NEONMnemonicAlias<"vceqq", "vceq">; 9141 9142def : NEONMnemonicAlias<"vzipq", "vzip">; 9143def : NEONMnemonicAlias<"vswpq", "vswp">; 9144 9145def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 9146def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 9147 9148 9149// Alias for loading floating point immediates that aren't representable 9150// using the vmov.f32 encoding but the bitpattern is representable using 9151// the .i32 encoding. 9152def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9153 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9154def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9155 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9156 9157// ARMv8.6a BFloat16 instructions. 9158let Predicates = [HasBF16, HasNEON] in { 9159class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6, 9160 dag oops, dag iops, list<dag> pattern> 9161 : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops, 9162 N3RegFrm, IIC_VDOTPROD, "", "", pattern> 9163{ 9164 let DecoderNamespace = "VFPV8"; 9165} 9166 9167class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy> 9168 : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst), 9169 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9170 [(set (AccumTy RegTy:$dst), 9171 (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9172 (InputTy RegTy:$Vn), 9173 (InputTy RegTy:$Vm)))]> { 9174 let Constraints = "$dst = $Vd"; 9175 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9176 let DecoderNamespace = "VFPV8"; 9177} 9178 9179multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, 9180 ValueType InputTy, dag RHS> { 9181 9182 def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst), 9183 (ins RegTy:$Vd, RegTy:$Vn, 9184 DPR_VFP2:$Vm, VectorIndex32:$lane), []> { 9185 bit lane; 9186 let Inst{5} = lane; 9187 let Constraints = "$dst = $Vd"; 9188 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane"); 9189 let DecoderNamespace = "VFPV8"; 9190 } 9191 9192 def : Pat< 9193 (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9194 (InputTy RegTy:$Vn), 9195 (InputTy (bitconvert (AccumTy 9196 (ARMvduplane (AccumTy RegTy:$Vm), 9197 VectorIndex32:$lane)))))), 9198 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 9199} 9200 9201def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>; 9202def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>; 9203 9204defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>; 9205defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 9206 9207class BF16MM<bit Q, RegisterClass RegTy, 9208 string opc> 9209 : N3Vnp<0b11000, 0b00, 0b1100, Q, 0, 9210 (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9211 N3RegFrm, IIC_VDOTPROD, "", "", 9212 [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd), 9213 (v8bf16 QPR:$Vn), 9214 (v8bf16 QPR:$Vm)))]> { 9215 let Constraints = "$dst = $Vd"; 9216 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9217 let DecoderNamespace = "VFPV8"; 9218} 9219 9220def VMMLA : BF16MM<1, QPR, "vmmla">; 9221 9222class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode> 9223 : N3VCP8<0b00, 0b11, T, 1, 9224 (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), 9225 NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "", 9226 [(set (v4f32 QPR:$dst), 9227 (OpNode (v4f32 QPR:$Vd), 9228 (v8bf16 QPR:$Vn), 9229 (v8bf16 QPR:$Vm)))]> { 9230 let Constraints = "$dst = $Vd"; 9231 let DecoderNamespace = "VFPV8"; 9232} 9233 9234def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>; 9235def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>; 9236 9237multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> { 9238 def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst), 9239 (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 9240 IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> { 9241 bits<2> idx; 9242 let Inst{5} = idx{1}; 9243 let Inst{3} = idx{0}; 9244 let Constraints = "$dst = $Vd"; 9245 let DecoderNamespace = "VFPV8"; 9246 } 9247 9248 def : Pat< 9249 (v4f32 (OpNode (v4f32 QPR:$Vd), 9250 (v8bf16 QPR:$Vn), 9251 (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm), 9252 VectorIndex16:$lane)))), 9253 (!cast<Instruction>(NAME) QPR:$Vd, 9254 QPR:$Vn, 9255 (EXTRACT_SUBREG QPR:$Vm, 9256 (DSubReg_i16_reg VectorIndex16:$lane)), 9257 (SubReg_i16_lane VectorIndex16:$lane))>; 9258} 9259 9260defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>; 9261defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>; 9262 9263def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0, 9264 (outs DPR:$Vd), (ins QPR:$Vm), 9265 NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>; 9266} 9267// End of BFloat16 instructions 9268