1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the ARM NEON instruction set. 10// 11//===----------------------------------------------------------------------===// 12 13 14//===----------------------------------------------------------------------===// 15// NEON-specific Operands. 16//===----------------------------------------------------------------------===// 17def nModImm : Operand<i32> { 18 let PrintMethod = "printVMOVModImmOperand"; 19} 20 21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 22def nImmSplatI8 : Operand<i32> { 23 let PrintMethod = "printVMOVModImmOperand"; 24 let ParserMatchClass = nImmSplatI8AsmOperand; 25} 26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 27def nImmSplatI16 : Operand<i32> { 28 let PrintMethod = "printVMOVModImmOperand"; 29 let ParserMatchClass = nImmSplatI16AsmOperand; 30} 31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 32def nImmSplatI32 : Operand<i32> { 33 let PrintMethod = "printVMOVModImmOperand"; 34 let ParserMatchClass = nImmSplatI32AsmOperand; 35} 36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 37def nImmSplatNotI16 : Operand<i32> { 38 let ParserMatchClass = nImmSplatNotI16AsmOperand; 39} 40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 41def nImmSplatNotI32 : Operand<i32> { 42 let ParserMatchClass = nImmSplatNotI32AsmOperand; 43} 44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 45def nImmVMOVI32 : Operand<i32> { 46 let PrintMethod = "printVMOVModImmOperand"; 47 let ParserMatchClass = nImmVMOVI32AsmOperand; 48} 49 50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To> 51 : AsmOperandClass { 52 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate"; 53 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">"; 54 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands"; 55} 56 57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To> 58 : AsmOperandClass { 59 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate"; 60 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">"; 61 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands"; 62} 63 64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> { 65 let PrintMethod = "printVMOVModImmOperand"; 66 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>; 67} 68 69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> { 70 let PrintMethod = "printVMOVModImmOperand"; 71 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>; 72} 73 74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 75def nImmVMOVI32Neg : Operand<i32> { 76 let PrintMethod = "printVMOVModImmOperand"; 77 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 78} 79def nImmVMOVF32 : Operand<i32> { 80 let PrintMethod = "printFPImmOperand"; 81 let ParserMatchClass = FPImmOperand; 82} 83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 84def nImmSplatI64 : Operand<i32> { 85 let PrintMethod = "printVMOVModImmOperand"; 86 let ParserMatchClass = nImmSplatI64AsmOperand; 87} 88 89def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } 93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 94 return ((uint64_t)Imm) < 8; 95}]> { 96 let ParserMatchClass = VectorIndex8Operand; 97 let PrintMethod = "printVectorIndex"; 98 let MIOperandInfo = (ops i32imm); 99} 100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 101 return ((uint64_t)Imm) < 4; 102}]> { 103 let ParserMatchClass = VectorIndex16Operand; 104 let PrintMethod = "printVectorIndex"; 105 let MIOperandInfo = (ops i32imm); 106} 107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 108 return ((uint64_t)Imm) < 2; 109}]> { 110 let ParserMatchClass = VectorIndex32Operand; 111 let PrintMethod = "printVectorIndex"; 112 let MIOperandInfo = (ops i32imm); 113} 114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{ 115 return ((uint64_t)Imm) < 1; 116}]> { 117 let ParserMatchClass = VectorIndex64Operand; 118 let PrintMethod = "printVectorIndex"; 119 let MIOperandInfo = (ops i32imm); 120} 121 122// Register list of one D register. 123def VecListOneDAsmOperand : AsmOperandClass { 124 let Name = "VecListOneD"; 125 let ParserMethod = "parseVectorList"; 126 let RenderMethod = "addVecListOperands"; 127} 128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 129 let ParserMatchClass = VecListOneDAsmOperand; 130} 131// Register list of two sequential D registers. 132def VecListDPairAsmOperand : AsmOperandClass { 133 let Name = "VecListDPair"; 134 let ParserMethod = "parseVectorList"; 135 let RenderMethod = "addVecListOperands"; 136} 137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 138 let ParserMatchClass = VecListDPairAsmOperand; 139} 140// Register list of three sequential D registers. 141def VecListThreeDAsmOperand : AsmOperandClass { 142 let Name = "VecListThreeD"; 143 let ParserMethod = "parseVectorList"; 144 let RenderMethod = "addVecListOperands"; 145} 146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 147 let ParserMatchClass = VecListThreeDAsmOperand; 148} 149// Register list of four sequential D registers. 150def VecListFourDAsmOperand : AsmOperandClass { 151 let Name = "VecListFourD"; 152 let ParserMethod = "parseVectorList"; 153 let RenderMethod = "addVecListOperands"; 154} 155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 156 let ParserMatchClass = VecListFourDAsmOperand; 157} 158// Register list of two D registers spaced by 2 (two sequential Q registers). 159def VecListDPairSpacedAsmOperand : AsmOperandClass { 160 let Name = "VecListDPairSpaced"; 161 let ParserMethod = "parseVectorList"; 162 let RenderMethod = "addVecListOperands"; 163} 164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 165 let ParserMatchClass = VecListDPairSpacedAsmOperand; 166} 167// Register list of three D registers spaced by 2 (three Q registers). 168def VecListThreeQAsmOperand : AsmOperandClass { 169 let Name = "VecListThreeQ"; 170 let ParserMethod = "parseVectorList"; 171 let RenderMethod = "addVecListOperands"; 172} 173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 174 let ParserMatchClass = VecListThreeQAsmOperand; 175} 176// Register list of three D registers spaced by 2 (three Q registers). 177def VecListFourQAsmOperand : AsmOperandClass { 178 let Name = "VecListFourQ"; 179 let ParserMethod = "parseVectorList"; 180 let RenderMethod = "addVecListOperands"; 181} 182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 183 let ParserMatchClass = VecListFourQAsmOperand; 184} 185 186// Register list of one D register, with "all lanes" subscripting. 187def VecListOneDAllLanesAsmOperand : AsmOperandClass { 188 let Name = "VecListOneDAllLanes"; 189 let ParserMethod = "parseVectorList"; 190 let RenderMethod = "addVecListOperands"; 191} 192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 193 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 194} 195// Register list of two D registers, with "all lanes" subscripting. 196def VecListDPairAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListDPairAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200} 201def VecListDPairAllLanes : RegisterOperand<DPair, 202 "printVectorListTwoAllLanes"> { 203 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 204} 205// Register list of two D registers spaced by 2 (two sequential Q registers). 206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 207 let Name = "VecListDPairSpacedAllLanes"; 208 let ParserMethod = "parseVectorList"; 209 let RenderMethod = "addVecListOperands"; 210} 211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc, 212 "printVectorListTwoSpacedAllLanes"> { 213 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 214} 215// Register list of three D registers, with "all lanes" subscripting. 216def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 217 let Name = "VecListThreeDAllLanes"; 218 let ParserMethod = "parseVectorList"; 219 let RenderMethod = "addVecListOperands"; 220} 221def VecListThreeDAllLanes : RegisterOperand<DPR, 222 "printVectorListThreeAllLanes"> { 223 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 224} 225// Register list of three D registers spaced by 2 (three sequential Q regs). 226def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 227 let Name = "VecListThreeQAllLanes"; 228 let ParserMethod = "parseVectorList"; 229 let RenderMethod = "addVecListOperands"; 230} 231def VecListThreeQAllLanes : RegisterOperand<DPR, 232 "printVectorListThreeSpacedAllLanes"> { 233 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 234} 235// Register list of four D registers, with "all lanes" subscripting. 236def VecListFourDAllLanesAsmOperand : AsmOperandClass { 237 let Name = "VecListFourDAllLanes"; 238 let ParserMethod = "parseVectorList"; 239 let RenderMethod = "addVecListOperands"; 240} 241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 242 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 243} 244// Register list of four D registers spaced by 2 (four sequential Q regs). 245def VecListFourQAllLanesAsmOperand : AsmOperandClass { 246 let Name = "VecListFourQAllLanes"; 247 let ParserMethod = "parseVectorList"; 248 let RenderMethod = "addVecListOperands"; 249} 250def VecListFourQAllLanes : RegisterOperand<DPR, 251 "printVectorListFourSpacedAllLanes"> { 252 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 253} 254 255 256// Register list of one D register, with byte lane subscripting. 257def VecListOneDByteIndexAsmOperand : AsmOperandClass { 258 let Name = "VecListOneDByteIndexed"; 259 let ParserMethod = "parseVectorList"; 260 let RenderMethod = "addVecListIndexedOperands"; 261} 262def VecListOneDByteIndexed : Operand<i32> { 263 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 264 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 265} 266// ...with half-word lane subscripting. 267def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 268 let Name = "VecListOneDHWordIndexed"; 269 let ParserMethod = "parseVectorList"; 270 let RenderMethod = "addVecListIndexedOperands"; 271} 272def VecListOneDHWordIndexed : Operand<i32> { 273 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 274 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 275} 276// ...with word lane subscripting. 277def VecListOneDWordIndexAsmOperand : AsmOperandClass { 278 let Name = "VecListOneDWordIndexed"; 279 let ParserMethod = "parseVectorList"; 280 let RenderMethod = "addVecListIndexedOperands"; 281} 282def VecListOneDWordIndexed : Operand<i32> { 283 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 284 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 285} 286 287// Register list of two D registers with byte lane subscripting. 288def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoDByteIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListTwoDByteIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297// ...with half-word lane subscripting. 298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 299 let Name = "VecListTwoDHWordIndexed"; 300 let ParserMethod = "parseVectorList"; 301 let RenderMethod = "addVecListIndexedOperands"; 302} 303def VecListTwoDHWordIndexed : Operand<i32> { 304 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 305 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 306} 307// ...with word lane subscripting. 308def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 309 let Name = "VecListTwoDWordIndexed"; 310 let ParserMethod = "parseVectorList"; 311 let RenderMethod = "addVecListIndexedOperands"; 312} 313def VecListTwoDWordIndexed : Operand<i32> { 314 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 315 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 316} 317// Register list of two Q registers with half-word lane subscripting. 318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 319 let Name = "VecListTwoQHWordIndexed"; 320 let ParserMethod = "parseVectorList"; 321 let RenderMethod = "addVecListIndexedOperands"; 322} 323def VecListTwoQHWordIndexed : Operand<i32> { 324 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 325 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 326} 327// ...with word lane subscripting. 328def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 329 let Name = "VecListTwoQWordIndexed"; 330 let ParserMethod = "parseVectorList"; 331 let RenderMethod = "addVecListIndexedOperands"; 332} 333def VecListTwoQWordIndexed : Operand<i32> { 334 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 335 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 336} 337 338 339// Register list of three D registers with byte lane subscripting. 340def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeDByteIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344} 345def VecListThreeDByteIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348} 349// ...with half-word lane subscripting. 350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 351 let Name = "VecListThreeDHWordIndexed"; 352 let ParserMethod = "parseVectorList"; 353 let RenderMethod = "addVecListIndexedOperands"; 354} 355def VecListThreeDHWordIndexed : Operand<i32> { 356 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 357 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 358} 359// ...with word lane subscripting. 360def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 361 let Name = "VecListThreeDWordIndexed"; 362 let ParserMethod = "parseVectorList"; 363 let RenderMethod = "addVecListIndexedOperands"; 364} 365def VecListThreeDWordIndexed : Operand<i32> { 366 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 367 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 368} 369// Register list of three Q registers with half-word lane subscripting. 370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 371 let Name = "VecListThreeQHWordIndexed"; 372 let ParserMethod = "parseVectorList"; 373 let RenderMethod = "addVecListIndexedOperands"; 374} 375def VecListThreeQHWordIndexed : Operand<i32> { 376 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 377 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 378} 379// ...with word lane subscripting. 380def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 381 let Name = "VecListThreeQWordIndexed"; 382 let ParserMethod = "parseVectorList"; 383 let RenderMethod = "addVecListIndexedOperands"; 384} 385def VecListThreeQWordIndexed : Operand<i32> { 386 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 387 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 388} 389 390// Register list of four D registers with byte lane subscripting. 391def VecListFourDByteIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourDByteIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListFourDByteIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400// ...with half-word lane subscripting. 401def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 402 let Name = "VecListFourDHWordIndexed"; 403 let ParserMethod = "parseVectorList"; 404 let RenderMethod = "addVecListIndexedOperands"; 405} 406def VecListFourDHWordIndexed : Operand<i32> { 407 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 408 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 409} 410// ...with word lane subscripting. 411def VecListFourDWordIndexAsmOperand : AsmOperandClass { 412 let Name = "VecListFourDWordIndexed"; 413 let ParserMethod = "parseVectorList"; 414 let RenderMethod = "addVecListIndexedOperands"; 415} 416def VecListFourDWordIndexed : Operand<i32> { 417 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 418 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 419} 420// Register list of four Q registers with half-word lane subscripting. 421def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 422 let Name = "VecListFourQHWordIndexed"; 423 let ParserMethod = "parseVectorList"; 424 let RenderMethod = "addVecListIndexedOperands"; 425} 426def VecListFourQHWordIndexed : Operand<i32> { 427 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 428 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 429} 430// ...with word lane subscripting. 431def VecListFourQWordIndexAsmOperand : AsmOperandClass { 432 let Name = "VecListFourQWordIndexed"; 433 let ParserMethod = "parseVectorList"; 434 let RenderMethod = "addVecListIndexedOperands"; 435} 436def VecListFourQWordIndexed : Operand<i32> { 437 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 438 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 439} 440 441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 442 return cast<LoadSDNode>(N)->getAlignment() >= 8; 443}]>; 444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 445 (store node:$val, node:$ptr), [{ 446 return cast<StoreSDNode>(N)->getAlignment() >= 8; 447}]>; 448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 449 return cast<LoadSDNode>(N)->getAlignment() == 4; 450}]>; 451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 452 (store node:$val, node:$ptr), [{ 453 return cast<StoreSDNode>(N)->getAlignment() == 4; 454}]>; 455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 456 return cast<LoadSDNode>(N)->getAlignment() == 2; 457}]>; 458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 459 (store node:$val, node:$ptr), [{ 460 return cast<StoreSDNode>(N)->getAlignment() == 2; 461}]>; 462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 463 return cast<LoadSDNode>(N)->getAlignment() == 1; 464}]>; 465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 466 (store node:$val, node:$ptr), [{ 467 return cast<StoreSDNode>(N)->getAlignment() == 1; 468}]>; 469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 470 return cast<LoadSDNode>(N)->getAlignment() < 4; 471}]>; 472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 473 (store node:$val, node:$ptr), [{ 474 return cast<StoreSDNode>(N)->getAlignment() < 4; 475}]>; 476 477//===----------------------------------------------------------------------===// 478// NEON-specific DAG Nodes. 479//===----------------------------------------------------------------------===// 480 481def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 482def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; 483 484// Types for vector shift by immediates. The "SHX" version is for long and 485// narrow operations where the source and destination vectors have different 486// types. The "SHINS" version is for shift and insert operations. 487def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 488 SDTCisVT<2, i32>]>; 489def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 490 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 491 492def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; 493 494def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; 495def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>; 496def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>; 497 498def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>; 499def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>; 500def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>; 501def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>; 502def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>; 503def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>; 504 505def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>; 506def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>; 507def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; 508 509def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; 510def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; 511 512def NEONvbsp : SDNode<"ARMISD::VBSP", 513 SDTypeProfile<1, 3, [SDTCisVec<0>, 514 SDTCisSameAs<0, 1>, 515 SDTCisSameAs<0, 2>, 516 SDTCisSameAs<0, 3>]>>; 517 518def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 519 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 520def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 521 522def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 523 SDTCisSameAs<0, 2>, 524 SDTCisSameAs<0, 3>]>; 525def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 526def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 527def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 528 529def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 530 SDTCisVT<2, v8i8>]>; 531def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 532 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; 533def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; 534def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; 535 536 537//===----------------------------------------------------------------------===// 538// NEON load / store instructions 539//===----------------------------------------------------------------------===// 540 541// Use VLDM to load a Q register as a D register pair. 542// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 543def VLDMQIA 544 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 545 IIC_fpLoad_m, "", 546 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; 547 548// Use VSTM to store a Q register as a D register pair. 549// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 550def VSTMQIA 551 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 552 IIC_fpStore_m, "", 553 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; 554 555// Classes for VLD* pseudo-instructions with multi-register operands. 556// These are expanded to real instructions after register allocation. 557class VLDQPseudo<InstrItinClass itin> 558 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 559class VLDQWBPseudo<InstrItinClass itin> 560 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 561 (ins addrmode6:$addr, am6offset:$offset), itin, 562 "$addr.addr = $wb">; 563class VLDQWBfixedPseudo<InstrItinClass itin> 564 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 565 (ins addrmode6:$addr), itin, 566 "$addr.addr = $wb">; 567class VLDQWBregisterPseudo<InstrItinClass itin> 568 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 569 (ins addrmode6:$addr, rGPR:$offset), itin, 570 "$addr.addr = $wb">; 571 572class VLDQQPseudo<InstrItinClass itin> 573 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 574class VLDQQWBPseudo<InstrItinClass itin> 575 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 576 (ins addrmode6:$addr, am6offset:$offset), itin, 577 "$addr.addr = $wb">; 578class VLDQQWBfixedPseudo<InstrItinClass itin> 579 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 580 (ins addrmode6:$addr), itin, 581 "$addr.addr = $wb">; 582class VLDQQWBregisterPseudo<InstrItinClass itin> 583 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 584 (ins addrmode6:$addr, rGPR:$offset), itin, 585 "$addr.addr = $wb">; 586 587 588class VLDQQQQPseudo<InstrItinClass itin> 589 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 590 "$src = $dst">; 591class VLDQQQQWBPseudo<InstrItinClass itin> 592 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 593 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 594 "$addr.addr = $wb, $src = $dst">; 595 596let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 597 598// VLD1 : Vector Load (multiple single elements) 599class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 600 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 601 (ins AddrMode:$Rn), IIC_VLD1, 602 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { 603 let Rm = 0b1111; 604 let Inst{4} = Rn{4}; 605 let DecoderMethod = "DecodeVLDST1Instruction"; 606} 607class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 608 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 609 (ins AddrMode:$Rn), IIC_VLD1x2, 610 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { 611 let Rm = 0b1111; 612 let Inst{5-4} = Rn{5-4}; 613 let DecoderMethod = "DecodeVLDST1Instruction"; 614} 615 616def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 617def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 618def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 619def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 620 621def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 622def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 623def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 624def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 625 626// ...with address register writeback: 627multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 628 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 629 (ins AddrMode:$Rn), IIC_VLD1u, 630 "vld1", Dt, "$Vd, $Rn!", 631 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 632 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 633 let Inst{4} = Rn{4}; 634 let DecoderMethod = "DecodeVLDST1Instruction"; 635 } 636 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 637 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 638 "vld1", Dt, "$Vd, $Rn, $Rm", 639 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 640 let Inst{4} = Rn{4}; 641 let DecoderMethod = "DecodeVLDST1Instruction"; 642 } 643} 644multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 645 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 646 (ins AddrMode:$Rn), IIC_VLD1x2u, 647 "vld1", Dt, "$Vd, $Rn!", 648 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 649 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 650 let Inst{5-4} = Rn{5-4}; 651 let DecoderMethod = "DecodeVLDST1Instruction"; 652 } 653 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 654 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 655 "vld1", Dt, "$Vd, $Rn, $Rm", 656 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 657 let Inst{5-4} = Rn{5-4}; 658 let DecoderMethod = "DecodeVLDST1Instruction"; 659 } 660} 661 662defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 663defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 664defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 665defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 666defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 667defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 668defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 669defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 670 671// ...with 3 registers 672class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 673 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 674 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 675 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { 676 let Rm = 0b1111; 677 let Inst{4} = Rn{4}; 678 let DecoderMethod = "DecodeVLDST1Instruction"; 679} 680multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 681 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 682 (ins AddrMode:$Rn), IIC_VLD1x2u, 683 "vld1", Dt, "$Vd, $Rn!", 684 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 685 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 686 let Inst{4} = Rn{4}; 687 let DecoderMethod = "DecodeVLDST1Instruction"; 688 } 689 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 690 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 691 "vld1", Dt, "$Vd, $Rn, $Rm", 692 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 693 let Inst{4} = Rn{4}; 694 let DecoderMethod = "DecodeVLDST1Instruction"; 695 } 696} 697 698def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 699def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 700def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 701def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 702 703defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 704defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 705defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 706defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 707 708def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 709def VLD1d8TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 710def VLD1d8TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 711def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 712def VLD1d16TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 713def VLD1d16TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 714def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 715def VLD1d32TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 716def VLD1d32TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 717def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 718def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 719def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 720 721def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 722def VLD1q8HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 723def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 724def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 725def VLD1q16HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 726def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 727def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 728def VLD1q32HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 729def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 730def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 731def VLD1q64HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 732def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 733 734// ...with 4 registers 735class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 736 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 737 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 738 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { 739 let Rm = 0b1111; 740 let Inst{5-4} = Rn{5-4}; 741 let DecoderMethod = "DecodeVLDST1Instruction"; 742} 743multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 744 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 745 (ins AddrMode:$Rn), IIC_VLD1x2u, 746 "vld1", Dt, "$Vd, $Rn!", 747 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 748 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 749 let Inst{5-4} = Rn{5-4}; 750 let DecoderMethod = "DecodeVLDST1Instruction"; 751 } 752 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 753 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 754 "vld1", Dt, "$Vd, $Rn, $Rm", 755 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 756 let Inst{5-4} = Rn{5-4}; 757 let DecoderMethod = "DecodeVLDST1Instruction"; 758 } 759} 760 761def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 762def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 763def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 764def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 765 766defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 767defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 768defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 769defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 770 771def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 772def VLD1d8QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 773def VLD1d8QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 774def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 775def VLD1d16QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 776def VLD1d16QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 777def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 778def VLD1d32QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 779def VLD1d32QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 780def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 781def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 782def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 783 784def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 785def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 786def VLD1q8HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 787def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 788def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 789def VLD1q16HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 790def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 791def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 792def VLD1q32HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 793def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 794def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 795def VLD1q64HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 796 797// VLD2 : Vector Load (multiple 2-element structures) 798class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 799 InstrItinClass itin, Operand AddrMode> 800 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 801 (ins AddrMode:$Rn), itin, 802 "vld2", Dt, "$Vd, $Rn", "", []> { 803 let Rm = 0b1111; 804 let Inst{5-4} = Rn{5-4}; 805 let DecoderMethod = "DecodeVLDST2Instruction"; 806} 807 808def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 809 addrmode6align64or128>, Sched<[WriteVLD2]>; 810def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 811 addrmode6align64or128>, Sched<[WriteVLD2]>; 812def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 813 addrmode6align64or128>, Sched<[WriteVLD2]>; 814 815def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 816 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 817def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 818 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 819def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 820 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 821 822def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 823def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 824def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 825 826// ...with address register writeback: 827multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 828 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 829 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 830 (ins AddrMode:$Rn), itin, 831 "vld2", Dt, "$Vd, $Rn!", 832 "$Rn.addr = $wb", []> { 833 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 834 let Inst{5-4} = Rn{5-4}; 835 let DecoderMethod = "DecodeVLDST2Instruction"; 836 } 837 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 838 (ins AddrMode:$Rn, rGPR:$Rm), itin, 839 "vld2", Dt, "$Vd, $Rn, $Rm", 840 "$Rn.addr = $wb", []> { 841 let Inst{5-4} = Rn{5-4}; 842 let DecoderMethod = "DecodeVLDST2Instruction"; 843 } 844} 845 846defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 847 addrmode6align64or128>, Sched<[WriteVLD2]>; 848defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 849 addrmode6align64or128>, Sched<[WriteVLD2]>; 850defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 851 addrmode6align64or128>, Sched<[WriteVLD2]>; 852 853defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 854 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 855defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 856 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 857defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 858 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 859 860def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 861def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 862def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 863def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 864def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 865def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 866 867// ...with double-spaced registers 868def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 869 addrmode6align64or128>, Sched<[WriteVLD2]>; 870def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 871 addrmode6align64or128>, Sched<[WriteVLD2]>; 872def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 873 addrmode6align64or128>, Sched<[WriteVLD2]>; 874defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 875 addrmode6align64or128>, Sched<[WriteVLD2]>; 876defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 877 addrmode6align64or128>, Sched<[WriteVLD2]>; 878defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 879 addrmode6align64or128>, Sched<[WriteVLD2]>; 880 881// VLD3 : Vector Load (multiple 3-element structures) 882class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 883 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 884 (ins addrmode6:$Rn), IIC_VLD3, 885 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { 886 let Rm = 0b1111; 887 let Inst{4} = Rn{4}; 888 let DecoderMethod = "DecodeVLDST3Instruction"; 889} 890 891def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 892def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 893def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 894 895def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 896def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 897def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 898 899// ...with address register writeback: 900class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 901 : NLdSt<0, 0b10, op11_8, op7_4, 902 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 903 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 904 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 905 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 906 let Inst{4} = Rn{4}; 907 let DecoderMethod = "DecodeVLDST3Instruction"; 908} 909 910def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 911def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 912def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 913 914def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 915def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 916def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 917 918// ...with double-spaced registers: 919def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 920def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 921def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 922def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 923def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 924def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 925 926def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 927def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 928def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 929 930// ...alternate versions to be allocated odd register numbers: 931def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 932def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 933def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 934 935def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 936def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 937def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 938 939// VLD4 : Vector Load (multiple 4-element structures) 940class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 941 : NLdSt<0, 0b10, op11_8, op7_4, 942 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 943 (ins addrmode6:$Rn), IIC_VLD4, 944 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, 945 Sched<[WriteVLD4]> { 946 let Rm = 0b1111; 947 let Inst{5-4} = Rn{5-4}; 948 let DecoderMethod = "DecodeVLDST4Instruction"; 949} 950 951def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 952def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 953def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 954 955def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 956def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 957def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 958 959// ...with address register writeback: 960class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 961 : NLdSt<0, 0b10, op11_8, op7_4, 962 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 963 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 964 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 965 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 966 let Inst{5-4} = Rn{5-4}; 967 let DecoderMethod = "DecodeVLDST4Instruction"; 968} 969 970def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 971def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 972def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 973 974def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 975def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 976def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 977 978// ...with double-spaced registers: 979def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 980def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 981def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 982def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 983def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 984def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 985 986def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 987def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 988def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 989 990// ...alternate versions to be allocated odd register numbers: 991def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 992def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 993def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 994 995def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 996def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 997def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 998 999} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1000 1001// Classes for VLD*LN pseudo-instructions with multi-register operands. 1002// These are expanded to real instructions after register allocation. 1003class VLDQLNPseudo<InstrItinClass itin> 1004 : PseudoNLdSt<(outs QPR:$dst), 1005 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1006 itin, "$src = $dst">; 1007class VLDQLNWBPseudo<InstrItinClass itin> 1008 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 1009 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1010 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1011class VLDQQLNPseudo<InstrItinClass itin> 1012 : PseudoNLdSt<(outs QQPR:$dst), 1013 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1014 itin, "$src = $dst">; 1015class VLDQQLNWBPseudo<InstrItinClass itin> 1016 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 1017 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1018 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1019class VLDQQQQLNPseudo<InstrItinClass itin> 1020 : PseudoNLdSt<(outs QQQQPR:$dst), 1021 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1022 itin, "$src = $dst">; 1023class VLDQQQQLNWBPseudo<InstrItinClass itin> 1024 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1025 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1026 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1027 1028// VLD1LN : Vector Load (single element to one lane) 1029class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1030 PatFrag LoadOp> 1031 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1032 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1033 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1034 "$src = $Vd", 1035 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1036 (i32 (LoadOp addrmode6:$Rn)), 1037 imm:$lane))]> { 1038 let Rm = 0b1111; 1039 let DecoderMethod = "DecodeVLD1LN"; 1040} 1041class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1042 PatFrag LoadOp> 1043 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1044 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1045 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1046 "$src = $Vd", 1047 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1048 (i32 (LoadOp addrmode6oneL32:$Rn)), 1049 imm:$lane))]>, Sched<[WriteVLD1]> { 1050 let Rm = 0b1111; 1051 let DecoderMethod = "DecodeVLD1LN"; 1052} 1053class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>, 1054 Sched<[WriteVLD1]> { 1055 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1056 (i32 (LoadOp addrmode6:$addr)), 1057 imm:$lane))]; 1058} 1059 1060def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1061 let Inst{7-5} = lane{2-0}; 1062} 1063def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1064 let Inst{7-6} = lane{1-0}; 1065 let Inst{5-4} = Rn{5-4}; 1066} 1067def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1068 let Inst{7} = lane{0}; 1069 let Inst{5-4} = Rn{5-4}; 1070} 1071 1072def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1073def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1074def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1075 1076let Predicates = [HasNEON] in { 1077def : Pat<(vector_insert (v4f16 DPR:$src), 1078 (f16 (load addrmode6:$addr)), imm:$lane), 1079 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1080def : Pat<(vector_insert (v8f16 QPR:$src), 1081 (f16 (load addrmode6:$addr)), imm:$lane), 1082 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1083def : Pat<(vector_insert (v4bf16 DPR:$src), 1084 (bf16 (load addrmode6:$addr)), imm:$lane), 1085 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1086def : Pat<(vector_insert (v8bf16 QPR:$src), 1087 (bf16 (load addrmode6:$addr)), imm:$lane), 1088 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1089def : Pat<(vector_insert (v2f32 DPR:$src), 1090 (f32 (load addrmode6:$addr)), imm:$lane), 1091 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1092def : Pat<(vector_insert (v4f32 QPR:$src), 1093 (f32 (load addrmode6:$addr)), imm:$lane), 1094 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1095 1096// A 64-bit subvector insert to the first 128-bit vector position 1097// is a subregister copy that needs no instruction. 1098def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), 1099 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1100def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), 1101 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1102def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), 1103 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1104def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), 1105 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1106def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), 1107 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1108def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), 1109 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1110} 1111 1112 1113let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1114 1115// ...with address register writeback: 1116class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1117 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1118 (ins addrmode6:$Rn, am6offset:$Rm, 1119 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1120 "\\{$Vd[$lane]\\}, $Rn$Rm", 1121 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1122 let DecoderMethod = "DecodeVLD1LN"; 1123} 1124 1125def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1126 let Inst{7-5} = lane{2-0}; 1127} 1128def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1129 let Inst{7-6} = lane{1-0}; 1130 let Inst{4} = Rn{4}; 1131} 1132def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1133 let Inst{7} = lane{0}; 1134 let Inst{5} = Rn{4}; 1135 let Inst{4} = Rn{4}; 1136} 1137 1138def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1139def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1140def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1141 1142// VLD2LN : Vector Load (single 2-element structure to one lane) 1143class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1144 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1145 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1146 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1147 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { 1148 let Rm = 0b1111; 1149 let Inst{4} = Rn{4}; 1150 let DecoderMethod = "DecodeVLD2LN"; 1151} 1152 1153def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1154 let Inst{7-5} = lane{2-0}; 1155} 1156def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1157 let Inst{7-6} = lane{1-0}; 1158} 1159def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1160 let Inst{7} = lane{0}; 1161} 1162 1163def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1164def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1165def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1166 1167// ...with double-spaced registers: 1168def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1169 let Inst{7-6} = lane{1-0}; 1170} 1171def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1172 let Inst{7} = lane{0}; 1173} 1174 1175def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1176def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1177 1178// ...with address register writeback: 1179class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1180 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1181 (ins addrmode6:$Rn, am6offset:$Rm, 1182 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1183 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1184 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1185 let Inst{4} = Rn{4}; 1186 let DecoderMethod = "DecodeVLD2LN"; 1187} 1188 1189def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1190 let Inst{7-5} = lane{2-0}; 1191} 1192def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1193 let Inst{7-6} = lane{1-0}; 1194} 1195def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1196 let Inst{7} = lane{0}; 1197} 1198 1199def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1200def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1201def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1202 1203def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1204 let Inst{7-6} = lane{1-0}; 1205} 1206def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1207 let Inst{7} = lane{0}; 1208} 1209 1210def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1211def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1212 1213// VLD3LN : Vector Load (single 3-element structure to one lane) 1214class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1215 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1216 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1217 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1218 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1219 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { 1220 let Rm = 0b1111; 1221 let DecoderMethod = "DecodeVLD3LN"; 1222} 1223 1224def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1225 let Inst{7-5} = lane{2-0}; 1226} 1227def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1228 let Inst{7-6} = lane{1-0}; 1229} 1230def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1231 let Inst{7} = lane{0}; 1232} 1233 1234def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1235def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1236def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1237 1238// ...with double-spaced registers: 1239def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1240 let Inst{7-6} = lane{1-0}; 1241} 1242def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1243 let Inst{7} = lane{0}; 1244} 1245 1246def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1247def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1248 1249// ...with address register writeback: 1250class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1251 : NLdStLn<1, 0b10, op11_8, op7_4, 1252 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1253 (ins addrmode6:$Rn, am6offset:$Rm, 1254 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1255 IIC_VLD3lnu, "vld3", Dt, 1256 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1257 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1258 []>, Sched<[WriteVLD2]> { 1259 let DecoderMethod = "DecodeVLD3LN"; 1260} 1261 1262def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1263 let Inst{7-5} = lane{2-0}; 1264} 1265def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1266 let Inst{7-6} = lane{1-0}; 1267} 1268def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1269 let Inst{7} = lane{0}; 1270} 1271 1272def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1273def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1274def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1275 1276def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1277 let Inst{7-6} = lane{1-0}; 1278} 1279def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1280 let Inst{7} = lane{0}; 1281} 1282 1283def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1284def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1285 1286// VLD4LN : Vector Load (single 4-element structure to one lane) 1287class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1288 : NLdStLn<1, 0b10, op11_8, op7_4, 1289 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1290 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1291 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1292 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1293 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, 1294 Sched<[WriteVLD2]> { 1295 let Rm = 0b1111; 1296 let Inst{4} = Rn{4}; 1297 let DecoderMethod = "DecodeVLD4LN"; 1298} 1299 1300def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1301 let Inst{7-5} = lane{2-0}; 1302} 1303def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1304 let Inst{7-6} = lane{1-0}; 1305} 1306def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1307 let Inst{7} = lane{0}; 1308 let Inst{5} = Rn{5}; 1309} 1310 1311def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1312def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1313def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1314 1315// ...with double-spaced registers: 1316def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1317 let Inst{7-6} = lane{1-0}; 1318} 1319def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1320 let Inst{7} = lane{0}; 1321 let Inst{5} = Rn{5}; 1322} 1323 1324def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1325def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1326 1327// ...with address register writeback: 1328class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1329 : NLdStLn<1, 0b10, op11_8, op7_4, 1330 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1331 (ins addrmode6:$Rn, am6offset:$Rm, 1332 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1333 IIC_VLD4lnu, "vld4", Dt, 1334"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1335"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1336 []> { 1337 let Inst{4} = Rn{4}; 1338 let DecoderMethod = "DecodeVLD4LN" ; 1339} 1340 1341def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1342 let Inst{7-5} = lane{2-0}; 1343} 1344def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1345 let Inst{7-6} = lane{1-0}; 1346} 1347def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1348 let Inst{7} = lane{0}; 1349 let Inst{5} = Rn{5}; 1350} 1351 1352def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1353def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1354def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1355 1356def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1357 let Inst{7-6} = lane{1-0}; 1358} 1359def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1360 let Inst{7} = lane{0}; 1361 let Inst{5} = Rn{5}; 1362} 1363 1364def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1365def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1366 1367} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1368 1369// VLD1DUP : Vector Load (single element to all lanes) 1370class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1371 Operand AddrMode> 1372 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1373 (ins AddrMode:$Rn), 1374 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1375 [(set VecListOneDAllLanes:$Vd, 1376 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>, 1377 Sched<[WriteVLD2]> { 1378 let Rm = 0b1111; 1379 let Inst{4} = Rn{4}; 1380 let DecoderMethod = "DecodeVLD1DupInstruction"; 1381} 1382def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1383 addrmode6dupalignNone>; 1384def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1385 addrmode6dupalign16>; 1386def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1387 addrmode6dupalign32>; 1388 1389let Predicates = [HasNEON] in { 1390def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1391 (VLD1DUPd32 addrmode6:$addr)>; 1392} 1393 1394class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1395 Operand AddrMode> 1396 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1397 (ins AddrMode:$Rn), IIC_VLD1dup, 1398 "vld1", Dt, "$Vd, $Rn", "", 1399 [(set VecListDPairAllLanes:$Vd, 1400 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1401 let Rm = 0b1111; 1402 let Inst{4} = Rn{4}; 1403 let DecoderMethod = "DecodeVLD1DupInstruction"; 1404} 1405 1406def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1407 addrmode6dupalignNone>; 1408def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1409 addrmode6dupalign16>; 1410def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1411 addrmode6dupalign32>; 1412 1413let Predicates = [HasNEON] in { 1414def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1415 (VLD1DUPq32 addrmode6:$addr)>; 1416} 1417 1418let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1419// ...with address register writeback: 1420multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1421 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1422 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1423 (ins AddrMode:$Rn), IIC_VLD1dupu, 1424 "vld1", Dt, "$Vd, $Rn!", 1425 "$Rn.addr = $wb", []> { 1426 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1427 let Inst{4} = Rn{4}; 1428 let DecoderMethod = "DecodeVLD1DupInstruction"; 1429 } 1430 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1431 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1432 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1433 "vld1", Dt, "$Vd, $Rn, $Rm", 1434 "$Rn.addr = $wb", []> { 1435 let Inst{4} = Rn{4}; 1436 let DecoderMethod = "DecodeVLD1DupInstruction"; 1437 } 1438} 1439multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1440 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1441 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1442 (ins AddrMode:$Rn), IIC_VLD1dupu, 1443 "vld1", Dt, "$Vd, $Rn!", 1444 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1445 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1446 let Inst{4} = Rn{4}; 1447 let DecoderMethod = "DecodeVLD1DupInstruction"; 1448 } 1449 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1450 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1451 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1452 "vld1", Dt, "$Vd, $Rn, $Rm", 1453 "$Rn.addr = $wb", []> { 1454 let Inst{4} = Rn{4}; 1455 let DecoderMethod = "DecodeVLD1DupInstruction"; 1456 } 1457} 1458 1459defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1460defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1461defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1462 1463defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1464defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1465defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1466 1467// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1468class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1469 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1470 (ins AddrMode:$Rn), IIC_VLD2dup, 1471 "vld2", Dt, "$Vd, $Rn", "", []> { 1472 let Rm = 0b1111; 1473 let Inst{4} = Rn{4}; 1474 let DecoderMethod = "DecodeVLD2DupInstruction"; 1475} 1476 1477def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1478 addrmode6dupalign16>; 1479def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1480 addrmode6dupalign32>; 1481def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1482 addrmode6dupalign64>; 1483 1484// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1485// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1486// ...with double-spaced registers 1487def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1488 addrmode6dupalign16>; 1489def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1490 addrmode6dupalign32>; 1491def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1492 addrmode6dupalign64>; 1493 1494def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1495def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1496def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1497def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1498def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1499def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1500 1501// ...with address register writeback: 1502multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1503 Operand AddrMode> { 1504 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1505 (outs VdTy:$Vd, GPR:$wb), 1506 (ins AddrMode:$Rn), IIC_VLD2dupu, 1507 "vld2", Dt, "$Vd, $Rn!", 1508 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1509 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1510 let Inst{4} = Rn{4}; 1511 let DecoderMethod = "DecodeVLD2DupInstruction"; 1512 } 1513 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1514 (outs VdTy:$Vd, GPR:$wb), 1515 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1516 "vld2", Dt, "$Vd, $Rn, $Rm", 1517 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1518 let Inst{4} = Rn{4}; 1519 let DecoderMethod = "DecodeVLD2DupInstruction"; 1520 } 1521} 1522 1523defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1524 addrmode6dupalign16>; 1525defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1526 addrmode6dupalign32>; 1527defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1528 addrmode6dupalign64>; 1529 1530defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1531 addrmode6dupalign16>; 1532defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1533 addrmode6dupalign32>; 1534defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1535 addrmode6dupalign64>; 1536 1537def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1538def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1539def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1540def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1541def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1542def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1543 1544// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1545class VLD3DUP<bits<4> op7_4, string Dt> 1546 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1547 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1548 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, 1549 Sched<[WriteVLD2]> { 1550 let Rm = 0b1111; 1551 let Inst{4} = 0; 1552 let DecoderMethod = "DecodeVLD3DupInstruction"; 1553} 1554 1555def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1556def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1557def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1558 1559def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1560def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1561def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1562 1563// ...with double-spaced registers (not used for codegen): 1564def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1565def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1566def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1567 1568def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1569def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1570def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1571def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1572def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1573def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1574 1575// ...with address register writeback: 1576class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1577 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1578 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1579 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1580 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1581 let Inst{4} = 0; 1582 let DecoderMethod = "DecodeVLD3DupInstruction"; 1583} 1584 1585def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1586def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1587def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1588 1589def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1590def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1591def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1592 1593def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1594def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1595def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1596 1597def VLD3DUPq8OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1598def VLD3DUPq16OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1599def VLD3DUPq32OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1600 1601// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1602class VLD4DUP<bits<4> op7_4, string Dt> 1603 : NLdSt<1, 0b10, 0b1111, op7_4, 1604 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1605 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1606 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1607 let Rm = 0b1111; 1608 let Inst{4} = Rn{4}; 1609 let DecoderMethod = "DecodeVLD4DupInstruction"; 1610} 1611 1612def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1613def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1614def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1615 1616def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1617def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1618def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1619 1620// ...with double-spaced registers (not used for codegen): 1621def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1622def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1623def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1624 1625def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1626def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1627def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1628def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1629def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1630def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1631 1632// ...with address register writeback: 1633class VLD4DUPWB<bits<4> op7_4, string Dt> 1634 : NLdSt<1, 0b10, 0b1111, op7_4, 1635 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1636 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1637 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1638 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1639 let Inst{4} = Rn{4}; 1640 let DecoderMethod = "DecodeVLD4DupInstruction"; 1641} 1642 1643def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1644def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1645def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1646 1647def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1648def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1649def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1650 1651def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1652def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1653def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1654 1655def VLD4DUPq8OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1656def VLD4DUPq16OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1657def VLD4DUPq32OddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1658 1659} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1660 1661let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1662 1663// Classes for VST* pseudo-instructions with multi-register operands. 1664// These are expanded to real instructions after register allocation. 1665class VSTQPseudo<InstrItinClass itin> 1666 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1667class VSTQWBPseudo<InstrItinClass itin> 1668 : PseudoNLdSt<(outs GPR:$wb), 1669 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1670 "$addr.addr = $wb">; 1671class VSTQWBfixedPseudo<InstrItinClass itin> 1672 : PseudoNLdSt<(outs GPR:$wb), 1673 (ins addrmode6:$addr, QPR:$src), itin, 1674 "$addr.addr = $wb">; 1675class VSTQWBregisterPseudo<InstrItinClass itin> 1676 : PseudoNLdSt<(outs GPR:$wb), 1677 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1678 "$addr.addr = $wb">; 1679class VSTQQPseudo<InstrItinClass itin> 1680 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1681class VSTQQWBPseudo<InstrItinClass itin> 1682 : PseudoNLdSt<(outs GPR:$wb), 1683 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1684 "$addr.addr = $wb">; 1685class VSTQQWBfixedPseudo<InstrItinClass itin> 1686 : PseudoNLdSt<(outs GPR:$wb), 1687 (ins addrmode6:$addr, QQPR:$src), itin, 1688 "$addr.addr = $wb">; 1689class VSTQQWBregisterPseudo<InstrItinClass itin> 1690 : PseudoNLdSt<(outs GPR:$wb), 1691 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1692 "$addr.addr = $wb">; 1693 1694class VSTQQQQPseudo<InstrItinClass itin> 1695 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1696class VSTQQQQWBPseudo<InstrItinClass itin> 1697 : PseudoNLdSt<(outs GPR:$wb), 1698 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1699 "$addr.addr = $wb">; 1700 1701// VST1 : Vector Store (multiple single elements) 1702class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1703 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1704 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { 1705 let Rm = 0b1111; 1706 let Inst{4} = Rn{4}; 1707 let DecoderMethod = "DecodeVLDST1Instruction"; 1708} 1709class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1710 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1711 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { 1712 let Rm = 0b1111; 1713 let Inst{5-4} = Rn{5-4}; 1714 let DecoderMethod = "DecodeVLDST1Instruction"; 1715} 1716 1717def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1718def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1719def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1720def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1721 1722def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1723def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1724def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1725def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1726 1727// ...with address register writeback: 1728multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1729 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1730 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1731 "vst1", Dt, "$Vd, $Rn!", 1732 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1733 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1734 let Inst{4} = Rn{4}; 1735 let DecoderMethod = "DecodeVLDST1Instruction"; 1736 } 1737 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1738 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1739 IIC_VLD1u, 1740 "vst1", Dt, "$Vd, $Rn, $Rm", 1741 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1742 let Inst{4} = Rn{4}; 1743 let DecoderMethod = "DecodeVLDST1Instruction"; 1744 } 1745} 1746multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1747 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1748 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1749 "vst1", Dt, "$Vd, $Rn!", 1750 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1751 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1752 let Inst{5-4} = Rn{5-4}; 1753 let DecoderMethod = "DecodeVLDST1Instruction"; 1754 } 1755 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1756 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1757 IIC_VLD1x2u, 1758 "vst1", Dt, "$Vd, $Rn, $Rm", 1759 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1760 let Inst{5-4} = Rn{5-4}; 1761 let DecoderMethod = "DecodeVLDST1Instruction"; 1762 } 1763} 1764 1765defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1766defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1767defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1768defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1769 1770defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1771defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1772defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1773defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1774 1775// ...with 3 registers 1776class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1777 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1778 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1779 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { 1780 let Rm = 0b1111; 1781 let Inst{4} = Rn{4}; 1782 let DecoderMethod = "DecodeVLDST1Instruction"; 1783} 1784multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1785 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1786 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1787 "vst1", Dt, "$Vd, $Rn!", 1788 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1789 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1790 let Inst{5-4} = Rn{5-4}; 1791 let DecoderMethod = "DecodeVLDST1Instruction"; 1792 } 1793 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1794 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1795 IIC_VLD1x3u, 1796 "vst1", Dt, "$Vd, $Rn, $Rm", 1797 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1798 let Inst{5-4} = Rn{5-4}; 1799 let DecoderMethod = "DecodeVLDST1Instruction"; 1800 } 1801} 1802 1803def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1804def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1805def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1806def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1807 1808defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1809defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1810defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1811defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1812 1813def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1814def VST1d8TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1815def VST1d8TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1816def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1817def VST1d16TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1818def VST1d16TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1819def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1820def VST1d32TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1821def VST1d32TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1822def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1823def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1824def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1825 1826def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1827def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1828def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1829def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1830 1831def VST1q8HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1832def VST1q16HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1833def VST1q32HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1834def VST1q64HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1835 1836def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1837def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1838def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1839def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1840 1841// ...with 4 registers 1842class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1843 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1844 (ins AddrMode:$Rn, VecListFourD:$Vd), 1845 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1846 []>, Sched<[WriteVST4]> { 1847 let Rm = 0b1111; 1848 let Inst{5-4} = Rn{5-4}; 1849 let DecoderMethod = "DecodeVLDST1Instruction"; 1850} 1851multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1852 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1853 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1854 "vst1", Dt, "$Vd, $Rn!", 1855 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1856 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1857 let Inst{5-4} = Rn{5-4}; 1858 let DecoderMethod = "DecodeVLDST1Instruction"; 1859 } 1860 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1861 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1862 IIC_VLD1x4u, 1863 "vst1", Dt, "$Vd, $Rn, $Rm", 1864 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1865 let Inst{5-4} = Rn{5-4}; 1866 let DecoderMethod = "DecodeVLDST1Instruction"; 1867 } 1868} 1869 1870def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1871def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1872def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1873def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1874 1875defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1876defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1877defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1878defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1879 1880def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1881def VST1d8QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1882def VST1d8QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1883def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1884def VST1d16QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1885def VST1d16QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1886def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1887def VST1d32QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1888def VST1d32QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1889def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1890def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1891def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1892 1893def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1894def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1895def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1896def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1897 1898def VST1q8HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1899def VST1q16HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1900def VST1q32HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1901def VST1q64HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1902 1903def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1904def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1905def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1906def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1907 1908// VST2 : Vector Store (multiple 2-element structures) 1909class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1910 InstrItinClass itin, Operand AddrMode> 1911 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1912 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1913 let Rm = 0b1111; 1914 let Inst{5-4} = Rn{5-4}; 1915 let DecoderMethod = "DecodeVLDST2Instruction"; 1916} 1917 1918def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1919 addrmode6align64or128>, Sched<[WriteVST2]>; 1920def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1921 addrmode6align64or128>, Sched<[WriteVST2]>; 1922def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1923 addrmode6align64or128>, Sched<[WriteVST2]>; 1924 1925def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1926 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1927def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1928 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1929def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1930 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1931 1932def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1933def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1934def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1935 1936// ...with address register writeback: 1937multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1938 RegisterOperand VdTy, Operand AddrMode> { 1939 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1940 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1941 "vst2", Dt, "$Vd, $Rn!", 1942 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1943 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1944 let Inst{5-4} = Rn{5-4}; 1945 let DecoderMethod = "DecodeVLDST2Instruction"; 1946 } 1947 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1948 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1949 "vst2", Dt, "$Vd, $Rn, $Rm", 1950 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1951 let Inst{5-4} = Rn{5-4}; 1952 let DecoderMethod = "DecodeVLDST2Instruction"; 1953 } 1954} 1955multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1956 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1957 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1958 "vst2", Dt, "$Vd, $Rn!", 1959 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1960 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1961 let Inst{5-4} = Rn{5-4}; 1962 let DecoderMethod = "DecodeVLDST2Instruction"; 1963 } 1964 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1965 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1966 IIC_VLD1u, 1967 "vst2", Dt, "$Vd, $Rn, $Rm", 1968 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1969 let Inst{5-4} = Rn{5-4}; 1970 let DecoderMethod = "DecodeVLDST2Instruction"; 1971 } 1972} 1973 1974defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1975 addrmode6align64or128>; 1976defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1977 addrmode6align64or128>; 1978defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1979 addrmode6align64or128>; 1980 1981defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1982defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1983defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1984 1985def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1986def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1987def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1988def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1989def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1990def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1991 1992// ...with double-spaced registers 1993def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1994 addrmode6align64or128>; 1995def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1996 addrmode6align64or128>; 1997def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1998 addrmode6align64or128>; 1999defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 2000 addrmode6align64or128>; 2001defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 2002 addrmode6align64or128>; 2003defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 2004 addrmode6align64or128>; 2005 2006// VST3 : Vector Store (multiple 3-element structures) 2007class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 2008 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2009 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 2010 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { 2011 let Rm = 0b1111; 2012 let Inst{4} = Rn{4}; 2013 let DecoderMethod = "DecodeVLDST3Instruction"; 2014} 2015 2016def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 2017def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 2018def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 2019 2020def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2021def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2022def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2023 2024// ...with address register writeback: 2025class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2026 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2027 (ins addrmode6:$Rn, am6offset:$Rm, 2028 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 2029 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 2030 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 2031 let Inst{4} = Rn{4}; 2032 let DecoderMethod = "DecodeVLDST3Instruction"; 2033} 2034 2035def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 2036def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 2037def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 2038 2039def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2040def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2041def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2042 2043// ...with double-spaced registers: 2044def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 2045def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 2046def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 2047def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 2048def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 2049def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 2050 2051def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2052def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2053def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2054 2055// ...alternate versions to be allocated odd register numbers: 2056def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2057def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2058def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2059 2060def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2061def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2062def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2063 2064// VST4 : Vector Store (multiple 4-element structures) 2065class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 2066 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2067 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 2068 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 2069 "", []>, Sched<[WriteVST4]> { 2070 let Rm = 0b1111; 2071 let Inst{5-4} = Rn{5-4}; 2072 let DecoderMethod = "DecodeVLDST4Instruction"; 2073} 2074 2075def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 2076def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 2077def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 2078 2079def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2080def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2081def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2082 2083// ...with address register writeback: 2084class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2085 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2086 (ins addrmode6:$Rn, am6offset:$Rm, 2087 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 2088 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 2089 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 2090 let Inst{5-4} = Rn{5-4}; 2091 let DecoderMethod = "DecodeVLDST4Instruction"; 2092} 2093 2094def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 2095def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 2096def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 2097 2098def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2099def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2100def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2101 2102// ...with double-spaced registers: 2103def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2104def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2105def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2106def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2107def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2108def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2109 2110def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2111def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2112def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2113 2114// ...alternate versions to be allocated odd register numbers: 2115def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2116def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2117def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2118 2119def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2120def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2121def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2122 2123} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2124 2125// Classes for VST*LN pseudo-instructions with multi-register operands. 2126// These are expanded to real instructions after register allocation. 2127class VSTQLNPseudo<InstrItinClass itin> 2128 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2129 itin, "">; 2130class VSTQLNWBPseudo<InstrItinClass itin> 2131 : PseudoNLdSt<(outs GPR:$wb), 2132 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2133 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2134class VSTQQLNPseudo<InstrItinClass itin> 2135 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2136 itin, "">; 2137class VSTQQLNWBPseudo<InstrItinClass itin> 2138 : PseudoNLdSt<(outs GPR:$wb), 2139 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2140 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2141class VSTQQQQLNPseudo<InstrItinClass itin> 2142 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2143 itin, "">; 2144class VSTQQQQLNWBPseudo<InstrItinClass itin> 2145 : PseudoNLdSt<(outs GPR:$wb), 2146 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2147 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2148 2149// VST1LN : Vector Store (single element from one lane) 2150class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2151 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2152 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2153 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2154 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2155 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, 2156 Sched<[WriteVST1]> { 2157 let Rm = 0b1111; 2158 let DecoderMethod = "DecodeVST1LN"; 2159} 2160class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2161 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> { 2162 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2163 addrmode6:$addr)]; 2164} 2165 2166def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2167 ARMvgetlaneu, addrmode6> { 2168 let Inst{7-5} = lane{2-0}; 2169} 2170def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2171 ARMvgetlaneu, addrmode6> { 2172 let Inst{7-6} = lane{1-0}; 2173 let Inst{4} = Rn{4}; 2174} 2175 2176def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2177 addrmode6oneL32> { 2178 let Inst{7} = lane{0}; 2179 let Inst{5-4} = Rn{5-4}; 2180} 2181 2182def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>; 2183def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>; 2184def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2185 2186let Predicates = [HasNEON] in { 2187def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2188 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2189def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2190 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2191 2192def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr), 2193 (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 2194def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr), 2195 (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2196} 2197 2198// ...with address register writeback: 2199class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2200 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2201 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2202 (ins AdrMode:$Rn, am6offset:$Rm, 2203 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2204 "\\{$Vd[$lane]\\}, $Rn$Rm", 2205 "$Rn.addr = $wb", 2206 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2207 AdrMode:$Rn, am6offset:$Rm))]>, 2208 Sched<[WriteVST1]> { 2209 let DecoderMethod = "DecodeVST1LN"; 2210} 2211class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2212 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> { 2213 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2214 addrmode6:$addr, am6offset:$offset))]; 2215} 2216 2217def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2218 ARMvgetlaneu, addrmode6> { 2219 let Inst{7-5} = lane{2-0}; 2220} 2221def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2222 ARMvgetlaneu, addrmode6> { 2223 let Inst{7-6} = lane{1-0}; 2224 let Inst{4} = Rn{4}; 2225} 2226def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2227 extractelt, addrmode6oneL32> { 2228 let Inst{7} = lane{0}; 2229 let Inst{5-4} = Rn{5-4}; 2230} 2231 2232def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>; 2233def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>; 2234def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2235 2236let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2237 2238// VST2LN : Vector Store (single 2-element structure from one lane) 2239class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2240 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2241 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2242 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2243 "", []>, Sched<[WriteVST1]> { 2244 let Rm = 0b1111; 2245 let Inst{4} = Rn{4}; 2246 let DecoderMethod = "DecodeVST2LN"; 2247} 2248 2249def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2250 let Inst{7-5} = lane{2-0}; 2251} 2252def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2253 let Inst{7-6} = lane{1-0}; 2254} 2255def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2256 let Inst{7} = lane{0}; 2257} 2258 2259def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2260def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2261def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2262 2263// ...with double-spaced registers: 2264def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2265 let Inst{7-6} = lane{1-0}; 2266 let Inst{4} = Rn{4}; 2267} 2268def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2269 let Inst{7} = lane{0}; 2270 let Inst{4} = Rn{4}; 2271} 2272 2273def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2274def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2275 2276// ...with address register writeback: 2277class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2278 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2279 (ins addrmode6:$Rn, am6offset:$Rm, 2280 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2281 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2282 "$Rn.addr = $wb", []> { 2283 let Inst{4} = Rn{4}; 2284 let DecoderMethod = "DecodeVST2LN"; 2285} 2286 2287def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2288 let Inst{7-5} = lane{2-0}; 2289} 2290def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2291 let Inst{7-6} = lane{1-0}; 2292} 2293def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2294 let Inst{7} = lane{0}; 2295} 2296 2297def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2298def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2299def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2300 2301def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2302 let Inst{7-6} = lane{1-0}; 2303} 2304def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2305 let Inst{7} = lane{0}; 2306} 2307 2308def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2309def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2310 2311// VST3LN : Vector Store (single 3-element structure from one lane) 2312class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2313 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2314 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2315 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2316 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, 2317 Sched<[WriteVST2]> { 2318 let Rm = 0b1111; 2319 let DecoderMethod = "DecodeVST3LN"; 2320} 2321 2322def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2323 let Inst{7-5} = lane{2-0}; 2324} 2325def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2326 let Inst{7-6} = lane{1-0}; 2327} 2328def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2329 let Inst{7} = lane{0}; 2330} 2331 2332def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2333def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2334def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2335 2336// ...with double-spaced registers: 2337def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2338 let Inst{7-6} = lane{1-0}; 2339} 2340def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2341 let Inst{7} = lane{0}; 2342} 2343 2344def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2345def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2346 2347// ...with address register writeback: 2348class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2349 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2350 (ins addrmode6:$Rn, am6offset:$Rm, 2351 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2352 IIC_VST3lnu, "vst3", Dt, 2353 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2354 "$Rn.addr = $wb", []> { 2355 let DecoderMethod = "DecodeVST3LN"; 2356} 2357 2358def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2359 let Inst{7-5} = lane{2-0}; 2360} 2361def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2362 let Inst{7-6} = lane{1-0}; 2363} 2364def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2365 let Inst{7} = lane{0}; 2366} 2367 2368def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2369def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2370def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2371 2372def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2373 let Inst{7-6} = lane{1-0}; 2374} 2375def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2376 let Inst{7} = lane{0}; 2377} 2378 2379def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2380def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2381 2382// VST4LN : Vector Store (single 4-element structure from one lane) 2383class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2384 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2385 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2386 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2387 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2388 "", []>, Sched<[WriteVST2]> { 2389 let Rm = 0b1111; 2390 let Inst{4} = Rn{4}; 2391 let DecoderMethod = "DecodeVST4LN"; 2392} 2393 2394def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2395 let Inst{7-5} = lane{2-0}; 2396} 2397def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2398 let Inst{7-6} = lane{1-0}; 2399} 2400def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2401 let Inst{7} = lane{0}; 2402 let Inst{5} = Rn{5}; 2403} 2404 2405def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2406def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2407def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2408 2409// ...with double-spaced registers: 2410def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2411 let Inst{7-6} = lane{1-0}; 2412} 2413def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2414 let Inst{7} = lane{0}; 2415 let Inst{5} = Rn{5}; 2416} 2417 2418def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2419def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2420 2421// ...with address register writeback: 2422class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2423 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2424 (ins addrmode6:$Rn, am6offset:$Rm, 2425 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2426 IIC_VST4lnu, "vst4", Dt, 2427 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2428 "$Rn.addr = $wb", []> { 2429 let Inst{4} = Rn{4}; 2430 let DecoderMethod = "DecodeVST4LN"; 2431} 2432 2433def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2434 let Inst{7-5} = lane{2-0}; 2435} 2436def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2437 let Inst{7-6} = lane{1-0}; 2438} 2439def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2440 let Inst{7} = lane{0}; 2441 let Inst{5} = Rn{5}; 2442} 2443 2444def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2445def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2446def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2447 2448def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2449 let Inst{7-6} = lane{1-0}; 2450} 2451def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2452 let Inst{7} = lane{0}; 2453 let Inst{5} = Rn{5}; 2454} 2455 2456def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2457def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2458 2459} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2460 2461// Use vld1/vst1 for unaligned f64 load / store 2462let Predicates = [IsLE,HasNEON] in { 2463def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2464 (VLD1d16 addrmode6:$addr)>; 2465def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2466 (VST1d16 addrmode6:$addr, DPR:$value)>; 2467def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2468 (VLD1d8 addrmode6:$addr)>; 2469def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2470 (VST1d8 addrmode6:$addr, DPR:$value)>; 2471} 2472let Predicates = [IsBE,HasNEON] in { 2473def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2474 (VLD1d64 addrmode6:$addr)>; 2475def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2476 (VST1d64 addrmode6:$addr, DPR:$value)>; 2477} 2478 2479// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2480// load / store if it's legal. 2481let Predicates = [HasNEON] in { 2482def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2483 (VLD1q64 addrmode6:$addr)>; 2484def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2485 (VST1q64 addrmode6:$addr, QPR:$value)>; 2486} 2487let Predicates = [IsLE,HasNEON] in { 2488def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2489 (VLD1q32 addrmode6:$addr)>; 2490def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2491 (VST1q32 addrmode6:$addr, QPR:$value)>; 2492def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2493 (VLD1q16 addrmode6:$addr)>; 2494def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2495 (VST1q16 addrmode6:$addr, QPR:$value)>; 2496def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2497 (VLD1q8 addrmode6:$addr)>; 2498def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2499 (VST1q8 addrmode6:$addr, QPR:$value)>; 2500} 2501 2502//===----------------------------------------------------------------------===// 2503// Instruction Classes 2504//===----------------------------------------------------------------------===// 2505 2506// Basic 2-register operations: double- and quad-register. 2507class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2508 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2509 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2510 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2511 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2512 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2513class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2514 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2515 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2516 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2517 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2518 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2519 2520// Basic 2-register intrinsics, both double- and quad-register. 2521class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2522 bits<2> op17_16, bits<5> op11_7, bit op4, 2523 InstrItinClass itin, string OpcodeStr, string Dt, 2524 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2525 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2526 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2527 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2528class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2529 bits<2> op17_16, bits<5> op11_7, bit op4, 2530 InstrItinClass itin, string OpcodeStr, string Dt, 2531 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2532 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2533 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2534 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2535 2536// Same as above, but not predicated. 2537class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2538 InstrItinClass itin, string OpcodeStr, string Dt, 2539 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2540 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2541 itin, OpcodeStr, Dt, 2542 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2543 2544class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2545 InstrItinClass itin, string OpcodeStr, string Dt, 2546 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2547 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2548 itin, OpcodeStr, Dt, 2549 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2550 2551// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2552class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2553 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2554 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2555 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2556 itin, OpcodeStr, Dt, 2557 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2558 2559// Same as N2VQIntXnp but with Vd as a src register. 2560class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2561 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2562 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2563 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2564 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2565 itin, OpcodeStr, Dt, 2566 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2567 let Constraints = "$src = $Vd"; 2568} 2569 2570// Narrow 2-register operations. 2571class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2572 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2573 InstrItinClass itin, string OpcodeStr, string Dt, 2574 ValueType TyD, ValueType TyQ, SDNode OpNode> 2575 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2576 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2577 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2578 2579// Narrow 2-register intrinsics. 2580class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2581 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2582 InstrItinClass itin, string OpcodeStr, string Dt, 2583 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2584 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2585 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2586 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2587 2588// Long 2-register operations (currently only used for VMOVL). 2589class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2590 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2591 InstrItinClass itin, string OpcodeStr, string Dt, 2592 ValueType TyQ, ValueType TyD, SDNode OpNode> 2593 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2594 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2595 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2596 2597// Long 2-register intrinsics. 2598class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2599 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2600 InstrItinClass itin, string OpcodeStr, string Dt, 2601 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2602 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2603 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2604 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2605 2606// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2607class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2608 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2609 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2610 OpcodeStr, Dt, "$Vd, $Vm", 2611 "$src1 = $Vd, $src2 = $Vm", []>; 2612class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2613 InstrItinClass itin, string OpcodeStr, string Dt> 2614 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2615 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2616 "$src1 = $Vd, $src2 = $Vm", []>; 2617 2618// Basic 3-register operations: double- and quad-register. 2619class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2620 InstrItinClass itin, string OpcodeStr, string Dt, 2621 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2622 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2623 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2624 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2625 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2626 // All of these have a two-operand InstAlias. 2627 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2628 let isCommutable = Commutable; 2629} 2630// Same as N3VD but no data type. 2631class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2632 InstrItinClass itin, string OpcodeStr, 2633 ValueType ResTy, ValueType OpTy, 2634 SDNode OpNode, bit Commutable> 2635 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2636 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2637 OpcodeStr, "$Vd, $Vn, $Vm", "", 2638 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2639 // All of these have a two-operand InstAlias. 2640 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2641 let isCommutable = Commutable; 2642} 2643 2644class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2645 InstrItinClass itin, string OpcodeStr, string Dt, 2646 ValueType Ty, SDNode ShOp> 2647 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2648 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2649 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2650 [(set (Ty DPR:$Vd), 2651 (Ty (ShOp (Ty DPR:$Vn), 2652 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2653 // All of these have a two-operand InstAlias. 2654 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2655 let isCommutable = 0; 2656} 2657class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2658 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2659 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2660 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2661 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2662 [(set (Ty DPR:$Vd), 2663 (Ty (ShOp (Ty DPR:$Vn), 2664 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2665 // All of these have a two-operand InstAlias. 2666 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2667 let isCommutable = 0; 2668} 2669 2670class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2671 InstrItinClass itin, string OpcodeStr, string Dt, 2672 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2673 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2674 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2675 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2676 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2677 // All of these have a two-operand InstAlias. 2678 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2679 let isCommutable = Commutable; 2680} 2681class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2682 InstrItinClass itin, string OpcodeStr, 2683 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2684 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2685 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2686 OpcodeStr, "$Vd, $Vn, $Vm", "", 2687 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2688 // All of these have a two-operand InstAlias. 2689 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2690 let isCommutable = Commutable; 2691} 2692class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2693 InstrItinClass itin, string OpcodeStr, string Dt, 2694 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2695 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2696 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2697 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2698 [(set (ResTy QPR:$Vd), 2699 (ResTy (ShOp (ResTy QPR:$Vn), 2700 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2701 imm:$lane)))))]> { 2702 // All of these have a two-operand InstAlias. 2703 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2704 let isCommutable = 0; 2705} 2706class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2707 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2708 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2709 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2710 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2711 [(set (ResTy QPR:$Vd), 2712 (ResTy (ShOp (ResTy QPR:$Vn), 2713 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2714 imm:$lane)))))]> { 2715 // All of these have a two-operand InstAlias. 2716 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2717 let isCommutable = 0; 2718} 2719 2720// Basic 3-register intrinsics, both double- and quad-register. 2721class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2722 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2723 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2724 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2725 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2726 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2727 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2728 // All of these have a two-operand InstAlias. 2729 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2730 let isCommutable = Commutable; 2731} 2732 2733class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2734 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2735 string Dt, ValueType ResTy, ValueType OpTy, 2736 SDPatternOperator IntOp, bit Commutable> 2737 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2738 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2739 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2740 2741class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2742 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2743 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2744 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2745 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2746 [(set (Ty DPR:$Vd), 2747 (Ty (IntOp (Ty DPR:$Vn), 2748 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2749 imm:$lane)))))]> { 2750 let isCommutable = 0; 2751} 2752 2753class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2754 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2755 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2756 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2757 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2758 [(set (Ty DPR:$Vd), 2759 (Ty (IntOp (Ty DPR:$Vn), 2760 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2761 let isCommutable = 0; 2762} 2763class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2764 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2765 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2766 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2767 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2768 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2769 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2770 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2771 let isCommutable = 0; 2772} 2773 2774class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2775 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2776 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2777 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2778 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2779 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2780 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2781 // All of these have a two-operand InstAlias. 2782 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2783 let isCommutable = Commutable; 2784} 2785 2786class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2787 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2788 string Dt, ValueType ResTy, ValueType OpTy, 2789 SDPatternOperator IntOp, bit Commutable> 2790 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2791 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2792 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2793 2794// Same as N3VQIntnp but with Vd as a src register. 2795class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2796 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2797 string Dt, ValueType ResTy, ValueType OpTy, 2798 SDPatternOperator IntOp, bit Commutable> 2799 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2800 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2801 f, itin, OpcodeStr, Dt, 2802 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2803 (OpTy QPR:$Vm))))]> { 2804 let Constraints = "$src = $Vd"; 2805} 2806 2807class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2808 string OpcodeStr, string Dt, 2809 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2810 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2811 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2812 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2813 [(set (ResTy QPR:$Vd), 2814 (ResTy (IntOp (ResTy QPR:$Vn), 2815 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2816 imm:$lane)))))]> { 2817 let isCommutable = 0; 2818} 2819class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2820 string OpcodeStr, string Dt, 2821 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2822 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2823 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2824 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2825 [(set (ResTy QPR:$Vd), 2826 (ResTy (IntOp (ResTy QPR:$Vn), 2827 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2828 imm:$lane)))))]> { 2829 let isCommutable = 0; 2830} 2831class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2832 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2833 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2834 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2835 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2836 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2837 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2838 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2839 let isCommutable = 0; 2840} 2841 2842// Multiply-Add/Sub operations: double- and quad-register. 2843class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2844 InstrItinClass itin, string OpcodeStr, string Dt, 2845 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2846 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2847 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2848 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2849 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2850 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2851 2852class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2853 string OpcodeStr, string Dt, 2854 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2855 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2856 (outs DPR:$Vd), 2857 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2858 NVMulSLFrm, itin, 2859 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2860 [(set (Ty DPR:$Vd), 2861 (Ty (ShOp (Ty DPR:$src1), 2862 (Ty (MulOp DPR:$Vn, 2863 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2864 imm:$lane)))))))]>; 2865class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2866 string OpcodeStr, string Dt, 2867 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2868 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2869 (outs DPR:$Vd), 2870 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2871 NVMulSLFrm, itin, 2872 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2873 [(set (Ty DPR:$Vd), 2874 (Ty (ShOp (Ty DPR:$src1), 2875 (Ty (MulOp DPR:$Vn, 2876 (Ty (ARMvduplane (Ty DPR_8:$Vm), 2877 imm:$lane)))))))]>; 2878 2879class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2880 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2881 SDPatternOperator MulOp, SDPatternOperator OpNode> 2882 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2883 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2884 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2885 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2886 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2887class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2888 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2889 SDPatternOperator MulOp, SDPatternOperator ShOp> 2890 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2891 (outs QPR:$Vd), 2892 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2893 NVMulSLFrm, itin, 2894 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2895 [(set (ResTy QPR:$Vd), 2896 (ResTy (ShOp (ResTy QPR:$src1), 2897 (ResTy (MulOp QPR:$Vn, 2898 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2899 imm:$lane)))))))]>; 2900class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2901 string OpcodeStr, string Dt, 2902 ValueType ResTy, ValueType OpTy, 2903 SDPatternOperator MulOp, SDPatternOperator ShOp> 2904 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2905 (outs QPR:$Vd), 2906 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2907 NVMulSLFrm, itin, 2908 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2909 [(set (ResTy QPR:$Vd), 2910 (ResTy (ShOp (ResTy QPR:$src1), 2911 (ResTy (MulOp QPR:$Vn, 2912 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2913 imm:$lane)))))))]>; 2914 2915// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2916class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2917 InstrItinClass itin, string OpcodeStr, string Dt, 2918 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2919 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2920 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2921 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2922 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2923 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2924class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2925 InstrItinClass itin, string OpcodeStr, string Dt, 2926 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2927 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2928 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2929 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2930 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2931 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2932 2933// Neon 3-argument intrinsics, both double- and quad-register. 2934// The destination register is also used as the first source operand register. 2935class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2936 InstrItinClass itin, string OpcodeStr, string Dt, 2937 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2938 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2939 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2940 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2941 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2942 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2943class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2944 InstrItinClass itin, string OpcodeStr, string Dt, 2945 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2946 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2947 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2948 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2949 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2950 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2951 2952// Long Multiply-Add/Sub operations. 2953class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2954 InstrItinClass itin, string OpcodeStr, string Dt, 2955 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2956 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2957 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2958 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2959 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2960 (TyQ (MulOp (TyD DPR:$Vn), 2961 (TyD DPR:$Vm)))))]>; 2962class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2963 InstrItinClass itin, string OpcodeStr, string Dt, 2964 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2965 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2966 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2967 NVMulSLFrm, itin, 2968 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2969 [(set QPR:$Vd, 2970 (OpNode (TyQ QPR:$src1), 2971 (TyQ (MulOp (TyD DPR:$Vn), 2972 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm), 2973 imm:$lane))))))]>; 2974class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2975 InstrItinClass itin, string OpcodeStr, string Dt, 2976 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2977 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2978 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2979 NVMulSLFrm, itin, 2980 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2981 [(set QPR:$Vd, 2982 (OpNode (TyQ QPR:$src1), 2983 (TyQ (MulOp (TyD DPR:$Vn), 2984 (TyD (ARMvduplane (TyD DPR_8:$Vm), 2985 imm:$lane))))))]>; 2986 2987// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2988class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2989 InstrItinClass itin, string OpcodeStr, string Dt, 2990 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2991 SDNode OpNode> 2992 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2993 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2994 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2995 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2996 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2997 (TyD DPR:$Vm)))))))]>; 2998 2999// Neon Long 3-argument intrinsic. The destination register is 3000// a quad-register and is also used as the first source operand register. 3001class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3002 InstrItinClass itin, string OpcodeStr, string Dt, 3003 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 3004 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3005 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3006 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 3007 [(set QPR:$Vd, 3008 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 3009class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3010 string OpcodeStr, string Dt, 3011 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3012 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3013 (outs QPR:$Vd), 3014 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3015 NVMulSLFrm, itin, 3016 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3017 [(set (ResTy QPR:$Vd), 3018 (ResTy (IntOp (ResTy QPR:$src1), 3019 (OpTy DPR:$Vn), 3020 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3021 imm:$lane)))))]>; 3022class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3023 InstrItinClass itin, string OpcodeStr, string Dt, 3024 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3025 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3026 (outs QPR:$Vd), 3027 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3028 NVMulSLFrm, itin, 3029 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3030 [(set (ResTy QPR:$Vd), 3031 (ResTy (IntOp (ResTy QPR:$src1), 3032 (OpTy DPR:$Vn), 3033 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3034 imm:$lane)))))]>; 3035 3036// Narrowing 3-register intrinsics. 3037class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3038 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 3039 SDPatternOperator IntOp, bit Commutable> 3040 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3041 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 3042 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3043 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 3044 let isCommutable = Commutable; 3045} 3046 3047// Long 3-register operations. 3048class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3049 InstrItinClass itin, string OpcodeStr, string Dt, 3050 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 3051 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3052 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3053 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3054 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3055 let isCommutable = Commutable; 3056} 3057 3058class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 3059 InstrItinClass itin, string OpcodeStr, string Dt, 3060 ValueType TyQ, ValueType TyD, SDNode OpNode> 3061 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3062 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3063 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3064 [(set QPR:$Vd, 3065 (TyQ (OpNode (TyD DPR:$Vn), 3066 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 3067class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3068 InstrItinClass itin, string OpcodeStr, string Dt, 3069 ValueType TyQ, ValueType TyD, SDNode OpNode> 3070 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3071 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3072 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3073 [(set QPR:$Vd, 3074 (TyQ (OpNode (TyD DPR:$Vn), 3075 (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3076 3077// Long 3-register operations with explicitly extended operands. 3078class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3079 InstrItinClass itin, string OpcodeStr, string Dt, 3080 ValueType TyQ, ValueType TyD, SDNode OpNode, SDPatternOperator ExtOp, 3081 bit Commutable> 3082 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3083 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3084 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3085 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3086 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3087 let isCommutable = Commutable; 3088} 3089 3090// Long 3-register intrinsics with explicit extend (VABDL). 3091class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3092 InstrItinClass itin, string OpcodeStr, string Dt, 3093 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3094 bit Commutable> 3095 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3096 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3097 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3098 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3099 (TyD DPR:$Vm))))))]> { 3100 let isCommutable = Commutable; 3101} 3102 3103// Long 3-register intrinsics. 3104class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3105 InstrItinClass itin, string OpcodeStr, string Dt, 3106 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3107 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3108 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3109 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3110 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3111 let isCommutable = Commutable; 3112} 3113 3114// Same as above, but not predicated. 3115class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3116 bit op4, InstrItinClass itin, string OpcodeStr, 3117 string Dt, ValueType ResTy, ValueType OpTy, 3118 SDPatternOperator IntOp, bit Commutable> 3119 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3120 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3121 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 3122 3123class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3124 string OpcodeStr, string Dt, 3125 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3126 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3127 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3128 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3129 [(set (ResTy QPR:$Vd), 3130 (ResTy (IntOp (OpTy DPR:$Vn), 3131 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3132 imm:$lane)))))]>; 3133class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3134 InstrItinClass itin, string OpcodeStr, string Dt, 3135 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3136 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3137 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3138 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3139 [(set (ResTy QPR:$Vd), 3140 (ResTy (IntOp (OpTy DPR:$Vn), 3141 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3142 imm:$lane)))))]>; 3143 3144// Wide 3-register operations. 3145class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3146 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3147 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable> 3148 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3149 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3150 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3151 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3152 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3153 // All of these have a two-operand InstAlias. 3154 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3155 let isCommutable = Commutable; 3156} 3157 3158// Pairwise long 2-register intrinsics, both double- and quad-register. 3159class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3160 bits<2> op17_16, bits<5> op11_7, bit op4, 3161 string OpcodeStr, string Dt, 3162 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3163 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3164 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3165 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3166class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3167 bits<2> op17_16, bits<5> op11_7, bit op4, 3168 string OpcodeStr, string Dt, 3169 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3170 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3171 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3172 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3173 3174// Pairwise long 2-register accumulate intrinsics, 3175// both double- and quad-register. 3176// The destination register is also used as the first source operand register. 3177class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3178 bits<2> op17_16, bits<5> op11_7, bit op4, 3179 string OpcodeStr, string Dt, 3180 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3181 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3182 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3183 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3184 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3185class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3186 bits<2> op17_16, bits<5> op11_7, bit op4, 3187 string OpcodeStr, string Dt, 3188 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3189 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3190 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3191 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3192 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3193 3194// Shift by immediate, 3195// both double- and quad-register. 3196let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3197class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3198 Format f, InstrItinClass itin, Operand ImmTy, 3199 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3200 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3201 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3202 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3203 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3204class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3205 Format f, InstrItinClass itin, Operand ImmTy, 3206 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3207 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3208 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3209 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3210 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3211} 3212 3213// Long shift by immediate. 3214class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3215 string OpcodeStr, string Dt, 3216 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3217 SDPatternOperator OpNode> 3218 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3219 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3220 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3221 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3222 3223// Narrow shift by immediate. 3224class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3225 InstrItinClass itin, string OpcodeStr, string Dt, 3226 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3227 SDPatternOperator OpNode> 3228 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3229 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3230 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3231 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3232 (i32 ImmTy:$SIMM))))]>; 3233 3234// Shift right by immediate and accumulate, 3235// both double- and quad-register. 3236let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3237class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3238 Operand ImmTy, string OpcodeStr, string Dt, 3239 ValueType Ty, SDNode ShOp> 3240 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3241 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3242 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3243 [(set DPR:$Vd, (Ty (add DPR:$src1, 3244 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3245class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3246 Operand ImmTy, string OpcodeStr, string Dt, 3247 ValueType Ty, SDNode ShOp> 3248 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3249 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3250 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3251 [(set QPR:$Vd, (Ty (add QPR:$src1, 3252 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3253} 3254 3255// Shift by immediate and insert, 3256// both double- and quad-register. 3257let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3258class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3259 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3260 ValueType Ty,SDNode ShOp> 3261 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3262 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3263 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3264 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3265class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3266 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3267 ValueType Ty,SDNode ShOp> 3268 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3269 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3270 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3271 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3272} 3273 3274// Convert, with fractional bits immediate, 3275// both double- and quad-register. 3276class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3277 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3278 SDPatternOperator IntOp> 3279 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3280 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3281 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3282 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3283class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3284 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3285 SDPatternOperator IntOp> 3286 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3287 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3288 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3289 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3290 3291//===----------------------------------------------------------------------===// 3292// Multiclasses 3293//===----------------------------------------------------------------------===// 3294 3295// Abbreviations used in multiclass suffixes: 3296// Q = quarter int (8 bit) elements 3297// H = half int (16 bit) elements 3298// S = single int (32 bit) elements 3299// D = double int (64 bit) elements 3300 3301// Neon 2-register vector operations and intrinsics. 3302 3303// Neon 2-register comparisons. 3304// source operand element sizes of 8, 16 and 32 bits: 3305multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3306 bits<5> op11_7, bit op4, string opc, string Dt, 3307 string asm, PatFrag fc> { 3308 // 64-bit vector types. 3309 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3310 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3311 opc, !strconcat(Dt, "8"), asm, "", 3312 [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>; 3313 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3314 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3315 opc, !strconcat(Dt, "16"), asm, "", 3316 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>; 3317 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3318 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3319 opc, !strconcat(Dt, "32"), asm, "", 3320 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>; 3321 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3322 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3323 opc, "f32", asm, "", 3324 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> { 3325 let Inst{10} = 1; // overwrite F = 1 3326 } 3327 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3328 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3329 opc, "f16", asm, "", 3330 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>, 3331 Requires<[HasNEON,HasFullFP16]> { 3332 let Inst{10} = 1; // overwrite F = 1 3333 } 3334 3335 // 128-bit vector types. 3336 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3337 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3338 opc, !strconcat(Dt, "8"), asm, "", 3339 [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>; 3340 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3341 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3342 opc, !strconcat(Dt, "16"), asm, "", 3343 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>; 3344 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3345 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3346 opc, !strconcat(Dt, "32"), asm, "", 3347 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>; 3348 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3349 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3350 opc, "f32", asm, "", 3351 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> { 3352 let Inst{10} = 1; // overwrite F = 1 3353 } 3354 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3355 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3356 opc, "f16", asm, "", 3357 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>, 3358 Requires<[HasNEON,HasFullFP16]> { 3359 let Inst{10} = 1; // overwrite F = 1 3360 } 3361} 3362 3363// Neon 3-register comparisons. 3364class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3365 InstrItinClass itin, string OpcodeStr, string Dt, 3366 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3367 : N3V<op24, op23, op21_20, op11_8, 1, op4, 3368 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 3369 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3370 [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> { 3371 // All of these have a two-operand InstAlias. 3372 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3373 let isCommutable = Commutable; 3374} 3375 3376class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3377 InstrItinClass itin, string OpcodeStr, string Dt, 3378 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3379 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3380 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3381 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3382 [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> { 3383 // All of these have a two-operand InstAlias. 3384 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3385 let isCommutable = Commutable; 3386} 3387 3388multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4, 3389 InstrItinClass itinD16, InstrItinClass itinD32, 3390 InstrItinClass itinQ16, InstrItinClass itinQ32, 3391 string OpcodeStr, string Dt, 3392 PatFrag fc, bit Commutable = 0> { 3393 // 64-bit vector types. 3394 def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16, 3395 OpcodeStr, !strconcat(Dt, "8"), 3396 v8i8, v8i8, fc, Commutable>; 3397 def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16, 3398 OpcodeStr, !strconcat(Dt, "16"), 3399 v4i16, v4i16, fc, Commutable>; 3400 def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32, 3401 OpcodeStr, !strconcat(Dt, "32"), 3402 v2i32, v2i32, fc, Commutable>; 3403 3404 // 128-bit vector types. 3405 def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16, 3406 OpcodeStr, !strconcat(Dt, "8"), 3407 v16i8, v16i8, fc, Commutable>; 3408 def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16, 3409 OpcodeStr, !strconcat(Dt, "16"), 3410 v8i16, v8i16, fc, Commutable>; 3411 def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32, 3412 OpcodeStr, !strconcat(Dt, "32"), 3413 v4i32, v4i32, fc, Commutable>; 3414} 3415 3416 3417// Neon 2-register vector intrinsics, 3418// element sizes of 8, 16 and 32 bits: 3419multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3420 bits<5> op11_7, bit op4, 3421 InstrItinClass itinD, InstrItinClass itinQ, 3422 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3423 // 64-bit vector types. 3424 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3425 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3426 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3427 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3428 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3429 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3430 3431 // 128-bit vector types. 3432 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3433 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3434 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3435 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3436 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3437 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3438} 3439 3440 3441// Neon Narrowing 2-register vector operations, 3442// source operand element sizes of 16, 32 and 64 bits: 3443multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3444 bits<5> op11_7, bit op6, bit op4, 3445 InstrItinClass itin, string OpcodeStr, string Dt, 3446 SDNode OpNode> { 3447 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3448 itin, OpcodeStr, !strconcat(Dt, "16"), 3449 v8i8, v8i16, OpNode>; 3450 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3451 itin, OpcodeStr, !strconcat(Dt, "32"), 3452 v4i16, v4i32, OpNode>; 3453 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3454 itin, OpcodeStr, !strconcat(Dt, "64"), 3455 v2i32, v2i64, OpNode>; 3456} 3457 3458// Neon Narrowing 2-register vector intrinsics, 3459// source operand element sizes of 16, 32 and 64 bits: 3460multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3461 bits<5> op11_7, bit op6, bit op4, 3462 InstrItinClass itin, string OpcodeStr, string Dt, 3463 SDPatternOperator IntOp> { 3464 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3465 itin, OpcodeStr, !strconcat(Dt, "16"), 3466 v8i8, v8i16, IntOp>; 3467 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3468 itin, OpcodeStr, !strconcat(Dt, "32"), 3469 v4i16, v4i32, IntOp>; 3470 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3471 itin, OpcodeStr, !strconcat(Dt, "64"), 3472 v2i32, v2i64, IntOp>; 3473} 3474 3475 3476// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3477// source operand element sizes of 16, 32 and 64 bits: 3478multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3479 string OpcodeStr, string Dt, SDNode OpNode> { 3480 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3481 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3482 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3483 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3484 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3485 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3486} 3487 3488 3489// Neon 3-register vector operations. 3490 3491// First with only element sizes of 8, 16 and 32 bits: 3492multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3493 InstrItinClass itinD16, InstrItinClass itinD32, 3494 InstrItinClass itinQ16, InstrItinClass itinQ32, 3495 string OpcodeStr, string Dt, 3496 SDNode OpNode, bit Commutable = 0> { 3497 // 64-bit vector types. 3498 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3499 OpcodeStr, !strconcat(Dt, "8"), 3500 v8i8, v8i8, OpNode, Commutable>; 3501 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3502 OpcodeStr, !strconcat(Dt, "16"), 3503 v4i16, v4i16, OpNode, Commutable>; 3504 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3505 OpcodeStr, !strconcat(Dt, "32"), 3506 v2i32, v2i32, OpNode, Commutable>; 3507 3508 // 128-bit vector types. 3509 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3510 OpcodeStr, !strconcat(Dt, "8"), 3511 v16i8, v16i8, OpNode, Commutable>; 3512 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3513 OpcodeStr, !strconcat(Dt, "16"), 3514 v8i16, v8i16, OpNode, Commutable>; 3515 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3516 OpcodeStr, !strconcat(Dt, "32"), 3517 v4i32, v4i32, OpNode, Commutable>; 3518} 3519 3520multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3521 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3522 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3523 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3524 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3525 v4i32, v2i32, ShOp>; 3526} 3527 3528// ....then also with element size 64 bits: 3529multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3530 InstrItinClass itinD, InstrItinClass itinQ, 3531 string OpcodeStr, string Dt, 3532 SDNode OpNode, bit Commutable = 0> 3533 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3534 OpcodeStr, Dt, OpNode, Commutable> { 3535 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3536 OpcodeStr, !strconcat(Dt, "64"), 3537 v1i64, v1i64, OpNode, Commutable>; 3538 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3539 OpcodeStr, !strconcat(Dt, "64"), 3540 v2i64, v2i64, OpNode, Commutable>; 3541} 3542 3543 3544// Neon 3-register vector intrinsics. 3545 3546// First with only element sizes of 16 and 32 bits: 3547multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3548 InstrItinClass itinD16, InstrItinClass itinD32, 3549 InstrItinClass itinQ16, InstrItinClass itinQ32, 3550 string OpcodeStr, string Dt, 3551 SDPatternOperator IntOp, bit Commutable = 0> { 3552 // 64-bit vector types. 3553 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3554 OpcodeStr, !strconcat(Dt, "16"), 3555 v4i16, v4i16, IntOp, Commutable>; 3556 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3557 OpcodeStr, !strconcat(Dt, "32"), 3558 v2i32, v2i32, IntOp, Commutable>; 3559 3560 // 128-bit vector types. 3561 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3562 OpcodeStr, !strconcat(Dt, "16"), 3563 v8i16, v8i16, IntOp, Commutable>; 3564 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3565 OpcodeStr, !strconcat(Dt, "32"), 3566 v4i32, v4i32, IntOp, Commutable>; 3567} 3568multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3569 InstrItinClass itinD16, InstrItinClass itinD32, 3570 InstrItinClass itinQ16, InstrItinClass itinQ32, 3571 string OpcodeStr, string Dt, 3572 SDPatternOperator IntOp> { 3573 // 64-bit vector types. 3574 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3575 OpcodeStr, !strconcat(Dt, "16"), 3576 v4i16, v4i16, IntOp>; 3577 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3578 OpcodeStr, !strconcat(Dt, "32"), 3579 v2i32, v2i32, IntOp>; 3580 3581 // 128-bit vector types. 3582 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3583 OpcodeStr, !strconcat(Dt, "16"), 3584 v8i16, v8i16, IntOp>; 3585 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3586 OpcodeStr, !strconcat(Dt, "32"), 3587 v4i32, v4i32, IntOp>; 3588} 3589 3590multiclass N3VIntSL_HS<bits<4> op11_8, 3591 InstrItinClass itinD16, InstrItinClass itinD32, 3592 InstrItinClass itinQ16, InstrItinClass itinQ32, 3593 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3594 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3595 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3596 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3597 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3598 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3599 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3600 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3601 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3602} 3603 3604// ....then also with element size of 8 bits: 3605multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3606 InstrItinClass itinD16, InstrItinClass itinD32, 3607 InstrItinClass itinQ16, InstrItinClass itinQ32, 3608 string OpcodeStr, string Dt, 3609 SDPatternOperator IntOp, bit Commutable = 0> 3610 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3611 OpcodeStr, Dt, IntOp, Commutable> { 3612 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3613 OpcodeStr, !strconcat(Dt, "8"), 3614 v8i8, v8i8, IntOp, Commutable>; 3615 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3616 OpcodeStr, !strconcat(Dt, "8"), 3617 v16i8, v16i8, IntOp, Commutable>; 3618} 3619multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3620 InstrItinClass itinD16, InstrItinClass itinD32, 3621 InstrItinClass itinQ16, InstrItinClass itinQ32, 3622 string OpcodeStr, string Dt, 3623 SDPatternOperator IntOp> 3624 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3625 OpcodeStr, Dt, IntOp> { 3626 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3627 OpcodeStr, !strconcat(Dt, "8"), 3628 v8i8, v8i8, IntOp>; 3629 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3630 OpcodeStr, !strconcat(Dt, "8"), 3631 v16i8, v16i8, IntOp>; 3632} 3633 3634 3635// ....then also with element size of 64 bits: 3636multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3637 InstrItinClass itinD16, InstrItinClass itinD32, 3638 InstrItinClass itinQ16, InstrItinClass itinQ32, 3639 string OpcodeStr, string Dt, 3640 SDPatternOperator IntOp, bit Commutable = 0> 3641 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3642 OpcodeStr, Dt, IntOp, Commutable> { 3643 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3644 OpcodeStr, !strconcat(Dt, "64"), 3645 v1i64, v1i64, IntOp, Commutable>; 3646 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3647 OpcodeStr, !strconcat(Dt, "64"), 3648 v2i64, v2i64, IntOp, Commutable>; 3649} 3650multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3651 InstrItinClass itinD16, InstrItinClass itinD32, 3652 InstrItinClass itinQ16, InstrItinClass itinQ32, 3653 string OpcodeStr, string Dt, 3654 SDPatternOperator IntOp> 3655 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3656 OpcodeStr, Dt, IntOp> { 3657 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3658 OpcodeStr, !strconcat(Dt, "64"), 3659 v1i64, v1i64, IntOp>; 3660 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3661 OpcodeStr, !strconcat(Dt, "64"), 3662 v2i64, v2i64, IntOp>; 3663} 3664 3665// Neon Narrowing 3-register vector intrinsics, 3666// source operand element sizes of 16, 32 and 64 bits: 3667multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3668 string OpcodeStr, string Dt, 3669 SDPatternOperator IntOp, bit Commutable = 0> { 3670 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3671 OpcodeStr, !strconcat(Dt, "16"), 3672 v8i8, v8i16, IntOp, Commutable>; 3673 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3674 OpcodeStr, !strconcat(Dt, "32"), 3675 v4i16, v4i32, IntOp, Commutable>; 3676 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3677 OpcodeStr, !strconcat(Dt, "64"), 3678 v2i32, v2i64, IntOp, Commutable>; 3679} 3680 3681 3682// Neon Long 3-register vector operations. 3683 3684multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3685 InstrItinClass itin16, InstrItinClass itin32, 3686 string OpcodeStr, string Dt, 3687 SDNode OpNode, bit Commutable = 0> { 3688 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3689 OpcodeStr, !strconcat(Dt, "8"), 3690 v8i16, v8i8, OpNode, Commutable>; 3691 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3692 OpcodeStr, !strconcat(Dt, "16"), 3693 v4i32, v4i16, OpNode, Commutable>; 3694 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3695 OpcodeStr, !strconcat(Dt, "32"), 3696 v2i64, v2i32, OpNode, Commutable>; 3697} 3698 3699multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3700 InstrItinClass itin, string OpcodeStr, string Dt, 3701 SDNode OpNode> { 3702 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3703 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3704 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3705 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3706} 3707 3708multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3709 InstrItinClass itin16, InstrItinClass itin32, 3710 string OpcodeStr, string Dt, 3711 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> { 3712 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3713 OpcodeStr, !strconcat(Dt, "8"), 3714 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3715 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3716 OpcodeStr, !strconcat(Dt, "16"), 3717 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3718 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3719 OpcodeStr, !strconcat(Dt, "32"), 3720 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3721} 3722 3723// Neon Long 3-register vector intrinsics. 3724 3725// First with only element sizes of 16 and 32 bits: 3726multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3727 InstrItinClass itin16, InstrItinClass itin32, 3728 string OpcodeStr, string Dt, 3729 SDPatternOperator IntOp, bit Commutable = 0> { 3730 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3731 OpcodeStr, !strconcat(Dt, "16"), 3732 v4i32, v4i16, IntOp, Commutable>; 3733 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3734 OpcodeStr, !strconcat(Dt, "32"), 3735 v2i64, v2i32, IntOp, Commutable>; 3736} 3737 3738multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3739 InstrItinClass itin, string OpcodeStr, string Dt, 3740 SDPatternOperator IntOp> { 3741 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3742 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3743 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3744 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3745} 3746 3747// ....then also with element size of 8 bits: 3748multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3749 InstrItinClass itin16, InstrItinClass itin32, 3750 string OpcodeStr, string Dt, 3751 SDPatternOperator IntOp, bit Commutable = 0> 3752 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3753 IntOp, Commutable> { 3754 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3755 OpcodeStr, !strconcat(Dt, "8"), 3756 v8i16, v8i8, IntOp, Commutable>; 3757} 3758 3759// ....with explicit extend (VABDL). 3760multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3761 InstrItinClass itin, string OpcodeStr, string Dt, 3762 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3763 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3764 OpcodeStr, !strconcat(Dt, "8"), 3765 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3766 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3767 OpcodeStr, !strconcat(Dt, "16"), 3768 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3769 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3770 OpcodeStr, !strconcat(Dt, "32"), 3771 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3772} 3773 3774 3775// Neon Wide 3-register vector intrinsics, 3776// source operand element sizes of 8, 16 and 32 bits: 3777multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3778 string OpcodeStr, string Dt, 3779 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> { 3780 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3781 OpcodeStr, !strconcat(Dt, "8"), 3782 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3783 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3784 OpcodeStr, !strconcat(Dt, "16"), 3785 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3786 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3787 OpcodeStr, !strconcat(Dt, "32"), 3788 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3789} 3790 3791 3792// Neon Multiply-Op vector operations, 3793// element sizes of 8, 16 and 32 bits: 3794multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3795 InstrItinClass itinD16, InstrItinClass itinD32, 3796 InstrItinClass itinQ16, InstrItinClass itinQ32, 3797 string OpcodeStr, string Dt, SDNode OpNode> { 3798 // 64-bit vector types. 3799 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3800 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3801 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3802 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3803 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3804 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3805 3806 // 128-bit vector types. 3807 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3808 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3809 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3810 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3811 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3812 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3813} 3814 3815multiclass N3VMulOpSL_HS<bits<4> op11_8, 3816 InstrItinClass itinD16, InstrItinClass itinD32, 3817 InstrItinClass itinQ16, InstrItinClass itinQ32, 3818 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3819 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3820 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3821 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3822 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3823 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3824 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3825 mul, ShOp>; 3826 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3827 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3828 mul, ShOp>; 3829} 3830 3831// Neon Intrinsic-Op vector operations, 3832// element sizes of 8, 16 and 32 bits: 3833multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3834 InstrItinClass itinD, InstrItinClass itinQ, 3835 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3836 SDNode OpNode> { 3837 // 64-bit vector types. 3838 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3839 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3840 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3841 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3842 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3843 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3844 3845 // 128-bit vector types. 3846 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3847 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3848 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3849 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3850 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3851 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3852} 3853 3854// Neon 3-argument intrinsics, 3855// element sizes of 16 and 32 bits: 3856multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3857 InstrItinClass itinD16, InstrItinClass itinD32, 3858 InstrItinClass itinQ16, InstrItinClass itinQ32, 3859 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3860 // 64-bit vector types. 3861 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3862 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3863 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3864 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3865 3866 // 128-bit vector types. 3867 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3868 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3869 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3870 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3871} 3872 3873// element sizes of 8, 16 and 32 bits: 3874multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3875 InstrItinClass itinD16, InstrItinClass itinD32, 3876 InstrItinClass itinQ16, InstrItinClass itinQ32, 3877 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3878 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3879 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3880 // 64-bit vector types. 3881 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3882 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3883 // 128-bit vector types. 3884 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3885 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3886} 3887 3888// Neon Long Multiply-Op vector operations, 3889// element sizes of 8, 16 and 32 bits: 3890multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3891 InstrItinClass itin16, InstrItinClass itin32, 3892 string OpcodeStr, string Dt, SDNode MulOp, 3893 SDNode OpNode> { 3894 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3895 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3896 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3897 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3898 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3899 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3900} 3901 3902multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3903 string Dt, SDNode MulOp, SDNode OpNode> { 3904 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3905 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3906 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3907 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3908} 3909 3910 3911// Neon Long 3-argument intrinsics. 3912 3913// First with only element sizes of 16 and 32 bits: 3914multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3915 InstrItinClass itin16, InstrItinClass itin32, 3916 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3917 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3918 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3919 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3920 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3921} 3922 3923multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3924 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3925 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3926 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3927 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3928 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3929} 3930 3931// ....then also with element size of 8 bits: 3932multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3933 InstrItinClass itin16, InstrItinClass itin32, 3934 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3935 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3936 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3937 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3938} 3939 3940// ....with explicit extend (VABAL). 3941multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3942 InstrItinClass itin, string OpcodeStr, string Dt, 3943 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3944 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3945 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3946 IntOp, ExtOp, OpNode>; 3947 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3948 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3949 IntOp, ExtOp, OpNode>; 3950 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3951 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3952 IntOp, ExtOp, OpNode>; 3953} 3954 3955 3956// Neon Pairwise long 2-register intrinsics, 3957// element sizes of 8, 16 and 32 bits: 3958multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3959 bits<5> op11_7, bit op4, 3960 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3961 // 64-bit vector types. 3962 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3963 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3964 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3965 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3966 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3967 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3968 3969 // 128-bit vector types. 3970 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3971 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3972 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3973 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3974 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3975 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3976} 3977 3978 3979// Neon Pairwise long 2-register accumulate intrinsics, 3980// element sizes of 8, 16 and 32 bits: 3981multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3982 bits<5> op11_7, bit op4, 3983 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3984 // 64-bit vector types. 3985 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3986 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3987 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3988 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3989 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3990 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3991 3992 // 128-bit vector types. 3993 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3994 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3995 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3996 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3997 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3998 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3999} 4000 4001 4002// Neon 2-register vector shift by immediate, 4003// with f of either N2RegVShLFrm or N2RegVShRFrm 4004// element sizes of 8, 16, 32 and 64 bits: 4005multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4006 InstrItinClass itin, string OpcodeStr, string Dt, 4007 SDNode OpNode> { 4008 // 64-bit vector types. 4009 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4010 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4011 let Inst{21-19} = 0b001; // imm6 = 001xxx 4012 } 4013 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4014 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4015 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4016 } 4017 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4018 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4019 let Inst{21} = 0b1; // imm6 = 1xxxxx 4020 } 4021 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 4022 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4023 // imm6 = xxxxxx 4024 4025 // 128-bit vector types. 4026 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4027 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4028 let Inst{21-19} = 0b001; // imm6 = 001xxx 4029 } 4030 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4031 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4032 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4033 } 4034 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4035 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4036 let Inst{21} = 0b1; // imm6 = 1xxxxx 4037 } 4038 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 4039 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4040 // imm6 = xxxxxx 4041} 4042multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4043 InstrItinClass itin, string OpcodeStr, string Dt, 4044 string baseOpc, SDNode OpNode> { 4045 // 64-bit vector types. 4046 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4047 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4048 let Inst{21-19} = 0b001; // imm6 = 001xxx 4049 } 4050 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4051 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4052 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4053 } 4054 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4055 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4056 let Inst{21} = 0b1; // imm6 = 1xxxxx 4057 } 4058 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4059 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4060 // imm6 = xxxxxx 4061 4062 // 128-bit vector types. 4063 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4064 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4065 let Inst{21-19} = 0b001; // imm6 = 001xxx 4066 } 4067 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4068 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4069 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4070 } 4071 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4072 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4073 let Inst{21} = 0b1; // imm6 = 1xxxxx 4074 } 4075 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4076 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4077 // imm6 = xxxxxx 4078} 4079 4080// Neon Shift-Accumulate vector operations, 4081// element sizes of 8, 16, 32 and 64 bits: 4082multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4083 string OpcodeStr, string Dt, SDNode ShOp> { 4084 // 64-bit vector types. 4085 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4086 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 4087 let Inst{21-19} = 0b001; // imm6 = 001xxx 4088 } 4089 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4090 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 4091 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4092 } 4093 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4094 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 4095 let Inst{21} = 0b1; // imm6 = 1xxxxx 4096 } 4097 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4098 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 4099 // imm6 = xxxxxx 4100 4101 // 128-bit vector types. 4102 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4103 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 4104 let Inst{21-19} = 0b001; // imm6 = 001xxx 4105 } 4106 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4107 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 4108 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4109 } 4110 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4111 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 4112 let Inst{21} = 0b1; // imm6 = 1xxxxx 4113 } 4114 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4115 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 4116 // imm6 = xxxxxx 4117} 4118 4119// Neon Shift-Insert vector operations, 4120// with f of either N2RegVShLFrm or N2RegVShRFrm 4121// element sizes of 8, 16, 32 and 64 bits: 4122multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4123 string OpcodeStr> { 4124 // 64-bit vector types. 4125 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4126 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> { 4127 let Inst{21-19} = 0b001; // imm6 = 001xxx 4128 } 4129 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4130 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> { 4131 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4132 } 4133 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4134 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> { 4135 let Inst{21} = 0b1; // imm6 = 1xxxxx 4136 } 4137 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 4138 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>; 4139 // imm6 = xxxxxx 4140 4141 // 128-bit vector types. 4142 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4143 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> { 4144 let Inst{21-19} = 0b001; // imm6 = 001xxx 4145 } 4146 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4147 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> { 4148 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4149 } 4150 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4151 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> { 4152 let Inst{21} = 0b1; // imm6 = 1xxxxx 4153 } 4154 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4155 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>; 4156 // imm6 = xxxxxx 4157} 4158multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4159 string OpcodeStr> { 4160 // 64-bit vector types. 4161 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4162 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> { 4163 let Inst{21-19} = 0b001; // imm6 = 001xxx 4164 } 4165 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4166 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> { 4167 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4168 } 4169 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4170 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> { 4171 let Inst{21} = 0b1; // imm6 = 1xxxxx 4172 } 4173 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4174 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>; 4175 // imm6 = xxxxxx 4176 4177 // 128-bit vector types. 4178 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4179 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> { 4180 let Inst{21-19} = 0b001; // imm6 = 001xxx 4181 } 4182 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4183 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> { 4184 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4185 } 4186 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4187 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> { 4188 let Inst{21} = 0b1; // imm6 = 1xxxxx 4189 } 4190 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4191 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>; 4192 // imm6 = xxxxxx 4193} 4194 4195// Neon Shift Long operations, 4196// element sizes of 8, 16, 32 bits: 4197multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4198 bit op4, string OpcodeStr, string Dt, 4199 SDPatternOperator OpNode> { 4200 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4201 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4202 let Inst{21-19} = 0b001; // imm6 = 001xxx 4203 } 4204 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4205 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4206 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4207 } 4208 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4209 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4210 let Inst{21} = 0b1; // imm6 = 1xxxxx 4211 } 4212} 4213 4214// Neon Shift Narrow operations, 4215// element sizes of 16, 32, 64 bits: 4216multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4217 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4218 SDPatternOperator OpNode> { 4219 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4220 OpcodeStr, !strconcat(Dt, "16"), 4221 v8i8, v8i16, shr_imm8, OpNode> { 4222 let Inst{21-19} = 0b001; // imm6 = 001xxx 4223 } 4224 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4225 OpcodeStr, !strconcat(Dt, "32"), 4226 v4i16, v4i32, shr_imm16, OpNode> { 4227 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4228 } 4229 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4230 OpcodeStr, !strconcat(Dt, "64"), 4231 v2i32, v2i64, shr_imm32, OpNode> { 4232 let Inst{21} = 0b1; // imm6 = 1xxxxx 4233 } 4234} 4235 4236//===----------------------------------------------------------------------===// 4237// Instruction Definitions. 4238//===----------------------------------------------------------------------===// 4239 4240// Vector Add Operations. 4241 4242// VADD : Vector Add (integer and floating-point) 4243defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4244 add, 1>; 4245def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4246 v2f32, v2f32, fadd, 1>; 4247def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4248 v4f32, v4f32, fadd, 1>; 4249def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", 4250 v4f16, v4f16, fadd, 1>, 4251 Requires<[HasNEON,HasFullFP16]>; 4252def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", 4253 v8f16, v8f16, fadd, 1>, 4254 Requires<[HasNEON,HasFullFP16]>; 4255// VADDL : Vector Add Long (Q = D + D) 4256defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4257 "vaddl", "s", add, sext, 1>; 4258defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4259 "vaddl", "u", add, zanyext, 1>; 4260// VADDW : Vector Add Wide (Q = Q + D) 4261defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4262defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>; 4263// VHADD : Vector Halving Add 4264defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4265 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4266 "vhadd", "s", int_arm_neon_vhadds, 1>; 4267defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4268 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4269 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4270// VRHADD : Vector Rounding Halving Add 4271defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4272 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4273 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4274defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4275 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4276 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4277// VQADD : Vector Saturating Add 4278defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4279 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4280 "vqadd", "s", saddsat, 1>; 4281defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4282 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4283 "vqadd", "u", uaddsat, 1>; 4284// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4285defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4286// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4287defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4288 int_arm_neon_vraddhn, 1>; 4289 4290let Predicates = [HasNEON] in { 4291def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4292 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4293def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4294 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4295def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4296 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4297} 4298 4299// Vector Multiply Operations. 4300 4301// VMUL : Vector Multiply (integer, polynomial and floating-point) 4302defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4303 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4304def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4305 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4306def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4307 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4308def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4309 v2f32, v2f32, fmul, 1>; 4310def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4311 v4f32, v4f32, fmul, 1>; 4312def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", 4313 v4f16, v4f16, fmul, 1>, 4314 Requires<[HasNEON,HasFullFP16]>; 4315def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", 4316 v8f16, v8f16, fmul, 1>, 4317 Requires<[HasNEON,HasFullFP16]>; 4318defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4319def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4320def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4321 v2f32, fmul>; 4322def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, 4323 Requires<[HasNEON,HasFullFP16]>; 4324def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, 4325 v4f16, fmul>, 4326 Requires<[HasNEON,HasFullFP16]>; 4327 4328let Predicates = [HasNEON] in { 4329def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4330 (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))), 4331 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4332 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4333 (DSubReg_i16_reg imm:$lane))), 4334 (SubReg_i16_lane imm:$lane)))>; 4335def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4336 (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))), 4337 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4338 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4339 (DSubReg_i32_reg imm:$lane))), 4340 (SubReg_i32_lane imm:$lane)))>; 4341def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4342 (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))), 4343 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4344 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4345 (DSubReg_i32_reg imm:$lane))), 4346 (SubReg_i32_lane imm:$lane)))>; 4347def : Pat<(v8f16 (fmul (v8f16 QPR:$src1), 4348 (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))), 4349 (v8f16 (VMULslhq(v8f16 QPR:$src1), 4350 (v4f16 (EXTRACT_SUBREG QPR:$src2, 4351 (DSubReg_i16_reg imm:$lane))), 4352 (SubReg_i16_lane imm:$lane)))>; 4353 4354def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4355 (VMULslfd DPR:$Rn, 4356 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4357 (i32 0))>; 4358def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4359 (VMULslhd DPR:$Rn, 4360 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4361 (i32 0))>; 4362def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4363 (VMULslfq QPR:$Rn, 4364 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4365 (i32 0))>; 4366def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4367 (VMULslhq QPR:$Rn, 4368 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4369 (i32 0))>; 4370} 4371 4372// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4373defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4374 IIC_VMULi16Q, IIC_VMULi32Q, 4375 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4376defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4377 IIC_VMULi16Q, IIC_VMULi32Q, 4378 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4379 4380let Predicates = [HasNEON] in { 4381def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4382 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4383 imm:$lane)))), 4384 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4385 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4386 (DSubReg_i16_reg imm:$lane))), 4387 (SubReg_i16_lane imm:$lane)))>; 4388def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4389 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4390 imm:$lane)))), 4391 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4392 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4393 (DSubReg_i32_reg imm:$lane))), 4394 (SubReg_i32_lane imm:$lane)))>; 4395} 4396 4397// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4398defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4399 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4400 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4401defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4402 IIC_VMULi16Q, IIC_VMULi32Q, 4403 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4404 4405let Predicates = [HasNEON] in { 4406def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4407 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4408 imm:$lane)))), 4409 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4410 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4411 (DSubReg_i16_reg imm:$lane))), 4412 (SubReg_i16_lane imm:$lane)))>; 4413def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4414 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4415 imm:$lane)))), 4416 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4417 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4418 (DSubReg_i32_reg imm:$lane))), 4419 (SubReg_i32_lane imm:$lane)))>; 4420} 4421 4422// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4423let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4424 DecoderNamespace = "NEONData" in { 4425 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4426 "vmull", "s", ARMvmulls, 1>; 4427 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4428 "vmull", "u", ARMvmullu, 1>; 4429 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4430 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4431 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4432 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4433 Requires<[HasV8, HasAES]>; 4434} 4435defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>; 4436defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>; 4437 4438// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4439defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4440 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4441defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4442 "vqdmull", "s", int_arm_neon_vqdmull>; 4443 4444// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4445 4446// VMLA : Vector Multiply Accumulate (integer and floating-point) 4447defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4448 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4449def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4450 v2f32, fmul_su, fadd_mlx>, 4451 Requires<[HasNEON, UseFPVMLx]>; 4452def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4453 v4f32, fmul_su, fadd_mlx>, 4454 Requires<[HasNEON, UseFPVMLx]>; 4455def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", 4456 v4f16, fmul_su, fadd_mlx>, 4457 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4458def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", 4459 v8f16, fmul_su, fadd_mlx>, 4460 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4461defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4462 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4463def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4464 v2f32, fmul_su, fadd_mlx>, 4465 Requires<[HasNEON, UseFPVMLx]>; 4466def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4467 v4f32, v2f32, fmul_su, fadd_mlx>, 4468 Requires<[HasNEON, UseFPVMLx]>; 4469def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", 4470 v4f16, fmul, fadd>, 4471 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4472def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", 4473 v8f16, v4f16, fmul, fadd>, 4474 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4475 4476let Predicates = [HasNEON] in { 4477def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4478 (mul (v8i16 QPR:$src2), 4479 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4480 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4481 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4482 (DSubReg_i16_reg imm:$lane))), 4483 (SubReg_i16_lane imm:$lane)))>; 4484 4485def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4486 (mul (v4i32 QPR:$src2), 4487 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4488 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4489 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4490 (DSubReg_i32_reg imm:$lane))), 4491 (SubReg_i32_lane imm:$lane)))>; 4492} 4493 4494def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4495 (fmul_su (v4f32 QPR:$src2), 4496 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4497 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4498 (v4f32 QPR:$src2), 4499 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4500 (DSubReg_i32_reg imm:$lane))), 4501 (SubReg_i32_lane imm:$lane)))>, 4502 Requires<[HasNEON, UseFPVMLx]>; 4503 4504// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4505defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4506 "vmlal", "s", ARMvmulls, add>; 4507defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4508 "vmlal", "u", ARMvmullu, add>; 4509 4510defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>; 4511defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>; 4512 4513let Predicates = [HasNEON, HasV8_1a] in { 4514 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4515 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4516 // (Q += D * D) 4517 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4518 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4519 null_frag>; 4520 def : Pat<(v4i16 (saddsat 4521 (v4i16 DPR:$src1), 4522 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4523 (v4i16 DPR:$Vm))))), 4524 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4525 def : Pat<(v2i32 (saddsat 4526 (v2i32 DPR:$src1), 4527 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4528 (v2i32 DPR:$Vm))))), 4529 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4530 def : Pat<(v8i16 (saddsat 4531 (v8i16 QPR:$src1), 4532 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4533 (v8i16 QPR:$Vm))))), 4534 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4535 def : Pat<(v4i32 (saddsat 4536 (v4i32 QPR:$src1), 4537 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4538 (v4i32 QPR:$Vm))))), 4539 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4540 4541 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4542 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4543 null_frag>; 4544 def : Pat<(v4i16 (saddsat 4545 (v4i16 DPR:$src1), 4546 (v4i16 (int_arm_neon_vqrdmulh 4547 (v4i16 DPR:$Vn), 4548 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4549 imm:$lane)))))), 4550 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4551 imm:$lane))>; 4552 def : Pat<(v2i32 (saddsat 4553 (v2i32 DPR:$src1), 4554 (v2i32 (int_arm_neon_vqrdmulh 4555 (v2i32 DPR:$Vn), 4556 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4557 imm:$lane)))))), 4558 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4559 imm:$lane))>; 4560 def : Pat<(v8i16 (saddsat 4561 (v8i16 QPR:$src1), 4562 (v8i16 (int_arm_neon_vqrdmulh 4563 (v8i16 QPR:$src2), 4564 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4565 imm:$lane)))))), 4566 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4567 (v8i16 QPR:$src2), 4568 (v4i16 (EXTRACT_SUBREG 4569 QPR:$src3, 4570 (DSubReg_i16_reg imm:$lane))), 4571 (SubReg_i16_lane imm:$lane)))>; 4572 def : Pat<(v4i32 (saddsat 4573 (v4i32 QPR:$src1), 4574 (v4i32 (int_arm_neon_vqrdmulh 4575 (v4i32 QPR:$src2), 4576 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4577 imm:$lane)))))), 4578 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4579 (v4i32 QPR:$src2), 4580 (v2i32 (EXTRACT_SUBREG 4581 QPR:$src3, 4582 (DSubReg_i32_reg imm:$lane))), 4583 (SubReg_i32_lane imm:$lane)))>; 4584 4585 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4586 // (Q -= D * D) 4587 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4588 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4589 null_frag>; 4590 def : Pat<(v4i16 (ssubsat 4591 (v4i16 DPR:$src1), 4592 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4593 (v4i16 DPR:$Vm))))), 4594 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4595 def : Pat<(v2i32 (ssubsat 4596 (v2i32 DPR:$src1), 4597 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4598 (v2i32 DPR:$Vm))))), 4599 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4600 def : Pat<(v8i16 (ssubsat 4601 (v8i16 QPR:$src1), 4602 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4603 (v8i16 QPR:$Vm))))), 4604 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4605 def : Pat<(v4i32 (ssubsat 4606 (v4i32 QPR:$src1), 4607 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4608 (v4i32 QPR:$Vm))))), 4609 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4610 4611 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4612 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4613 null_frag>; 4614 def : Pat<(v4i16 (ssubsat 4615 (v4i16 DPR:$src1), 4616 (v4i16 (int_arm_neon_vqrdmulh 4617 (v4i16 DPR:$Vn), 4618 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4619 imm:$lane)))))), 4620 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4621 def : Pat<(v2i32 (ssubsat 4622 (v2i32 DPR:$src1), 4623 (v2i32 (int_arm_neon_vqrdmulh 4624 (v2i32 DPR:$Vn), 4625 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4626 imm:$lane)))))), 4627 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4628 imm:$lane))>; 4629 def : Pat<(v8i16 (ssubsat 4630 (v8i16 QPR:$src1), 4631 (v8i16 (int_arm_neon_vqrdmulh 4632 (v8i16 QPR:$src2), 4633 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4634 imm:$lane)))))), 4635 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4636 (v8i16 QPR:$src2), 4637 (v4i16 (EXTRACT_SUBREG 4638 QPR:$src3, 4639 (DSubReg_i16_reg imm:$lane))), 4640 (SubReg_i16_lane imm:$lane)))>; 4641 def : Pat<(v4i32 (ssubsat 4642 (v4i32 QPR:$src1), 4643 (v4i32 (int_arm_neon_vqrdmulh 4644 (v4i32 QPR:$src2), 4645 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4646 imm:$lane)))))), 4647 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4648 (v4i32 QPR:$src2), 4649 (v2i32 (EXTRACT_SUBREG 4650 QPR:$src3, 4651 (DSubReg_i32_reg imm:$lane))), 4652 (SubReg_i32_lane imm:$lane)))>; 4653} 4654// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4655defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4656 "vqdmlal", "s", null_frag>; 4657defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4658 4659let Predicates = [HasNEON] in { 4660def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4661 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4662 (v4i16 DPR:$Vm))))), 4663 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4664def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4665 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4666 (v2i32 DPR:$Vm))))), 4667 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4668def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4669 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4670 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4671 imm:$lane)))))), 4672 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4673def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4674 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4675 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4676 imm:$lane)))))), 4677 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4678} 4679 4680// VMLS : Vector Multiply Subtract (integer and floating-point) 4681defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4682 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4683def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4684 v2f32, fmul_su, fsub_mlx>, 4685 Requires<[HasNEON, UseFPVMLx]>; 4686def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4687 v4f32, fmul_su, fsub_mlx>, 4688 Requires<[HasNEON, UseFPVMLx]>; 4689def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", 4690 v4f16, fmul, fsub>, 4691 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4692def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", 4693 v8f16, fmul, fsub>, 4694 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4695defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4696 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4697def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4698 v2f32, fmul_su, fsub_mlx>, 4699 Requires<[HasNEON, UseFPVMLx]>; 4700def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4701 v4f32, v2f32, fmul_su, fsub_mlx>, 4702 Requires<[HasNEON, UseFPVMLx]>; 4703def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", 4704 v4f16, fmul, fsub>, 4705 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4706def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", 4707 v8f16, v4f16, fmul, fsub>, 4708 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4709 4710let Predicates = [HasNEON] in { 4711def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4712 (mul (v8i16 QPR:$src2), 4713 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4714 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4715 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4716 (DSubReg_i16_reg imm:$lane))), 4717 (SubReg_i16_lane imm:$lane)))>; 4718 4719def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4720 (mul (v4i32 QPR:$src2), 4721 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4722 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4723 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4724 (DSubReg_i32_reg imm:$lane))), 4725 (SubReg_i32_lane imm:$lane)))>; 4726} 4727 4728def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4729 (fmul_su (v4f32 QPR:$src2), 4730 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4731 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4732 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4733 (DSubReg_i32_reg imm:$lane))), 4734 (SubReg_i32_lane imm:$lane)))>, 4735 Requires<[HasNEON, UseFPVMLx]>; 4736 4737// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4738defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4739 "vmlsl", "s", ARMvmulls, sub>; 4740defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4741 "vmlsl", "u", ARMvmullu, sub>; 4742 4743defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>; 4744defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>; 4745 4746// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4747defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4748 "vqdmlsl", "s", null_frag>; 4749defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4750 4751let Predicates = [HasNEON] in { 4752def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4753 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4754 (v4i16 DPR:$Vm))))), 4755 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4756def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4757 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4758 (v2i32 DPR:$Vm))))), 4759 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4760def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4761 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4762 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4763 imm:$lane)))))), 4764 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4765def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4766 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4767 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4768 imm:$lane)))))), 4769 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4770} 4771 4772// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4773def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4774 v2f32, fmul_su, fadd_mlx>, 4775 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4776 4777def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4778 v4f32, fmul_su, fadd_mlx>, 4779 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4780def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", 4781 v4f16, fmul, fadd>, 4782 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4783 4784def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", 4785 v8f16, fmul, fadd>, 4786 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4787 4788// Fused Vector Multiply Subtract (floating-point) 4789def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4790 v2f32, fmul_su, fsub_mlx>, 4791 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4792def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4793 v4f32, fmul_su, fsub_mlx>, 4794 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4795def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", 4796 v4f16, fmul, fsub>, 4797 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4798def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", 4799 v8f16, fmul, fsub>, 4800 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4801 4802// Match @llvm.fma.* intrinsics 4803def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4804 (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4805 Requires<[HasNEON,HasFullFP16]>; 4806def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4807 (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4808 Requires<[HasNEON,HasFullFP16]>; 4809def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4810 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4811 Requires<[HasNEON,HasVFP4]>; 4812def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4813 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4814 Requires<[HasNEON,HasVFP4]>; 4815def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4816 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4817 Requires<[HasNEON,HasVFP4]>; 4818def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4819 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4820 Requires<[HasNEON,HasVFP4]>; 4821 4822// ARMv8.2a dot product instructions. 4823// We put them in the VFPV8 decoder namespace because the ARM and Thumb 4824// encodings are the same and thus no further bit twiddling is necessary 4825// in the disassembler. 4826class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm, 4827 string AsmTy, ValueType AccumTy, ValueType InputTy, 4828 SDPatternOperator OpNode> : 4829 N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst), 4830 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, 4831 Asm, AsmTy, 4832 [(set (AccumTy RegTy:$dst), 4833 (OpNode (AccumTy RegTy:$Vd), 4834 (InputTy RegTy:$Vn), 4835 (InputTy RegTy:$Vm)))]> { 4836 let Predicates = [HasDotProd]; 4837 let DecoderNamespace = "VFPV8"; 4838 let Constraints = "$dst = $Vd"; 4839} 4840 4841def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>; 4842def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>; 4843def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>; 4844def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>; 4845 4846// Indexed dot product instructions: 4847multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty, 4848 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode, 4849 dag RHS> { 4850 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst), 4851 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 4852 N3RegFrm, IIC_VDOTPROD, opc, dt, []> { 4853 bit lane; 4854 let Inst{5} = lane; 4855 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); 4856 let Constraints = "$dst = $Vd"; 4857 let Predicates = [HasDotProd]; 4858 let DecoderNamespace = "VFPV8"; 4859 } 4860 4861 def : Pat< 4862 (AccumType (OpNode (AccumType Ty:$Vd), 4863 (InputType Ty:$Vn), 4864 (InputType (bitconvert (AccumType 4865 (ARMvduplane (AccumType Ty:$Vm), 4866 VectorIndex32:$lane)))))), 4867 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>; 4868} 4869 4870defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8, 4871 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>; 4872defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8, 4873 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>; 4874defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8, 4875 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4876defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8, 4877 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4878 4879// v8.6A matrix multiplication extension 4880let Predicates = [HasMatMulInt8] in { 4881 class N3VMatMul<bit B, bit U, string Asm, string AsmTy, 4882 SDPatternOperator OpNode> 4883 : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst), 4884 (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary, 4885 Asm, AsmTy, 4886 [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd), 4887 (v16i8 QPR:$Vn), 4888 (v16i8 QPR:$Vm)))]> { 4889 let DecoderNamespace = "VFPV8"; 4890 let Constraints = "$dst = $Vd"; 4891 } 4892 4893 multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy, 4894 ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode, 4895 dag RHS> { 4896 4897 def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst), 4898 (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm, 4899 NoItinerary, Asm, AsmTy, []> { 4900 bit lane; 4901 let Inst{5} = lane; 4902 let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane"); 4903 let DecoderNamespace = "VFPV8"; 4904 let Constraints = "$dst = $Vd"; 4905 } 4906 4907 def : Pat< 4908 (AccumTy (OpNode (AccumTy RegTy:$Vd), 4909 (InputTy RegTy:$Vn), 4910 (InputTy (bitconvert (AccumTy 4911 (ARMvduplane (AccumTy RegTy:$Vm), 4912 VectorIndex32:$lane)))))), 4913 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4914 4915 } 4916 4917 multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS> 4918 : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> { 4919 def : Pat< 4920 (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd), 4921 (InputTy (bitconvert (AccumTy 4922 (ARMvduplane (AccumTy RegTy:$Vm), 4923 VectorIndex32:$lane)))), 4924 (InputTy RegTy:$Vn))), 4925 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4926 } 4927 4928 def VSMMLA : N3VMatMul<0, 0, "vsmmla", "s8", int_arm_neon_smmla>; 4929 def VUMMLA : N3VMatMul<0, 1, "vummla", "u8", int_arm_neon_ummla>; 4930 def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>; 4931 def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8, int_arm_neon_usdot>; 4932 def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>; 4933 4934 defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8, 4935 int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>; 4936 defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8, 4937 int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4938 defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>; 4939 defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4940} 4941 4942// ARMv8.3 complex operations 4943class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q, 4944 InstrItinClass itin, dag oops, dag iops, 4945 string opc, string dt, list<dag> pattern> 4946 : N3VCP8<{?,?}, {op21,s}, q, op4, oops, 4947 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ 4948 bits<2> rot; 4949 let Inst{24-23} = rot; 4950} 4951 4952class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q, 4953 InstrItinClass itin, dag oops, dag iops, string opc, 4954 string dt, list<dag> pattern> 4955 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, 4956 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { 4957 bits<1> rot; 4958 let Inst{24} = rot; 4959} 4960 4961class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin, 4962 dag oops, dag iops, string opc, string dt, 4963 list<dag> pattern> 4964 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4965 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4966 bits<2> rot; 4967 bit lane; 4968 4969 let Inst{21-20} = rot; 4970 let Inst{5} = lane; 4971} 4972 4973class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin, 4974 dag oops, dag iops, string opc, string dt, 4975 list<dag> pattern> 4976 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4977 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4978 bits<2> rot; 4979 bit lane; 4980 4981 let Inst{21-20} = rot; 4982 let Inst{5} = Vm{4}; 4983 // This is needed because the lane operand does not have any bits in the 4984 // encoding (it only has one possible value), so we need to manually set it 4985 // to it's default value. 4986 let DecoderMethod = "DecodeNEONComplexLane64Instruction"; 4987} 4988 4989multiclass N3VCP8ComplexTied<bit op21, bit op4, 4990 string OpcodeStr, SDPatternOperator Op> { 4991 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4992 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd), 4993 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4994 OpcodeStr, "f16", []>; 4995 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd), 4996 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4997 OpcodeStr, "f16", []>; 4998 } 4999 let Predicates = [HasNEON,HasV8_3a] in { 5000 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd), 5001 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 5002 OpcodeStr, "f32", []>; 5003 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd), 5004 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 5005 OpcodeStr, "f32", []>; 5006 } 5007} 5008 5009multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4, 5010 string OpcodeStr, SDPatternOperator Op> { 5011 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5012 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD, 5013 (outs DPR:$Vd), 5014 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 5015 OpcodeStr, "f16", []>; 5016 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ, 5017 (outs QPR:$Vd), 5018 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 5019 OpcodeStr, "f16", []>; 5020 } 5021 let Predicates = [HasNEON,HasV8_3a] in { 5022 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD, 5023 (outs DPR:$Vd), 5024 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 5025 OpcodeStr, "f32", []>; 5026 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ, 5027 (outs QPR:$Vd), 5028 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 5029 OpcodeStr, "f32", []>; 5030 } 5031} 5032 5033// These instructions index by pairs of lanes, so the VectorIndexes are twice 5034// as wide as the data types. 5035multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr, 5036 SDPatternOperator Op> { 5037 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5038 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD, 5039 (outs DPR:$Vd), 5040 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 5041 VectorIndex32:$lane, complexrotateop:$rot), 5042 OpcodeStr, "f16", []>; 5043 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ, 5044 (outs QPR:$Vd), 5045 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, 5046 VectorIndex32:$lane, complexrotateop:$rot), 5047 OpcodeStr, "f16", []>; 5048 } 5049 let Predicates = [HasNEON,HasV8_3a] in { 5050 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD, 5051 (outs DPR:$Vd), 5052 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5053 complexrotateop:$rot), 5054 OpcodeStr, "f32", []>; 5055 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ, 5056 (outs QPR:$Vd), 5057 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5058 complexrotateop:$rot), 5059 OpcodeStr, "f32", []>; 5060 } 5061} 5062 5063defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; 5064defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; 5065defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; 5066 5067let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5068 def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5069 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>; 5070 def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5071 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>; 5072 def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5073 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>; 5074 def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5075 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>; 5076} 5077let Predicates = [HasNEON,HasV8_3a] in { 5078 def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5079 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>; 5080 def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5081 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>; 5082 def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5083 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>; 5084 def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5085 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>; 5086} 5087 5088// Vector Subtract Operations. 5089 5090// VSUB : Vector Subtract (integer and floating-point) 5091defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 5092 "vsub", "i", sub, 0>; 5093def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 5094 v2f32, v2f32, fsub, 0>; 5095def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 5096 v4f32, v4f32, fsub, 0>; 5097def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", 5098 v4f16, v4f16, fsub, 0>, 5099 Requires<[HasNEON,HasFullFP16]>; 5100def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", 5101 v8f16, v8f16, fsub, 0>, 5102 Requires<[HasNEON,HasFullFP16]>; 5103// VSUBL : Vector Subtract Long (Q = D - D) 5104defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5105 "vsubl", "s", sub, sext, 0>; 5106defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5107 "vsubl", "u", sub, zanyext, 0>; 5108// VSUBW : Vector Subtract Wide (Q = Q - D) 5109defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 5110defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>; 5111// VHSUB : Vector Halving Subtract 5112defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 5113 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5114 "vhsub", "s", int_arm_neon_vhsubs, 0>; 5115defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 5116 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5117 "vhsub", "u", int_arm_neon_vhsubu, 0>; 5118// VQSUB : Vector Saturing Subtract 5119defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 5120 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5121 "vqsub", "s", ssubsat, 0>; 5122defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 5123 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5124 "vqsub", "u", usubsat, 0>; 5125// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 5126defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 5127// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 5128defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 5129 int_arm_neon_vrsubhn, 0>; 5130 5131let Predicates = [HasNEON] in { 5132def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 5133 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 5134def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 5135 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 5136def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 5137 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 5138} 5139 5140// Vector Comparisons. 5141 5142// VCEQ : Vector Compare Equal 5143defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5144 IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>; 5145def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 5146 ARMCCeq, 1>; 5147def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 5148 ARMCCeq, 1>; 5149def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, 5150 ARMCCeq, 1>, 5151 Requires<[HasNEON, HasFullFP16]>; 5152def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, 5153 ARMCCeq, 1>, 5154 Requires<[HasNEON, HasFullFP16]>; 5155 5156let TwoOperandAliasConstraint = "$Vm = $Vd" in 5157defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 5158 "$Vd, $Vm, #0", ARMCCeq>; 5159 5160// VCGE : Vector Compare Greater Than or Equal 5161defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5162 IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>; 5163defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5164 IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>; 5165def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 5166 ARMCCge, 0>; 5167def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 5168 ARMCCge, 0>; 5169def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, 5170 ARMCCge, 0>, 5171 Requires<[HasNEON, HasFullFP16]>; 5172def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, 5173 ARMCCge, 0>, 5174 Requires<[HasNEON, HasFullFP16]>; 5175 5176let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5177defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 5178 "$Vd, $Vm, #0", ARMCCge>; 5179defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 5180 "$Vd, $Vm, #0", ARMCCle>; 5181} 5182 5183// VCGT : Vector Compare Greater Than 5184defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5185 IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>; 5186defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5187 IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>; 5188def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 5189 ARMCCgt, 0>; 5190def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 5191 ARMCCgt, 0>; 5192def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, 5193 ARMCCgt, 0>, 5194 Requires<[HasNEON, HasFullFP16]>; 5195def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, 5196 ARMCCgt, 0>, 5197 Requires<[HasNEON, HasFullFP16]>; 5198 5199let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5200defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 5201 "$Vd, $Vm, #0", ARMCCgt>; 5202defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 5203 "$Vd, $Vm, #0", ARMCClt>; 5204} 5205 5206// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 5207def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5208 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 5209def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5210 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 5211def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5212 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, 5213 Requires<[HasNEON, HasFullFP16]>; 5214def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5215 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, 5216 Requires<[HasNEON, HasFullFP16]>; 5217// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 5218def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5219 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 5220def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5221 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 5222def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5223 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, 5224 Requires<[HasNEON, HasFullFP16]>; 5225def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5226 "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>, 5227 Requires<[HasNEON, HasFullFP16]>; 5228// VTST : Vector Test Bits 5229defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 5230 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 5231 5232def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5233 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5234def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5235 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5236def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5237 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5238def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5239 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5240let Predicates = [HasNEON, HasFullFP16] in { 5241def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5242 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5243def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5244 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5245def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5246 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5247def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5248 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5249} 5250 5251// +fp16fml Floating Point Multiplication Variants 5252let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in { 5253 5254class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn, 5255 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5256 : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5257 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5258 5259class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn, 5260 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5261 : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5262 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5263 5264// Vd, Vs, Vs[0-15], Idx[0-1] 5265class VFMD<string opc, string type, bits<2> S> 5266 : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd), 5267 (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx), 5268 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5269 bit idx; 5270 let Inst{3} = idx; 5271 let Inst{19-16} = Vn{4-1}; 5272 let Inst{7} = Vn{0}; 5273 let Inst{5} = Vm{0}; 5274 let Inst{2-0} = Vm{3-1}; 5275} 5276 5277// Vq, Vd, Vd[0-7], Idx[0-3] 5278class VFMQ<string opc, string type, bits<2> S> 5279 : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd), 5280 (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 5281 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5282 bits<2> idx; 5283 let Inst{5} = idx{1}; 5284 let Inst{3} = idx{0}; 5285} 5286 5287// op1 op2 op3 5288def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>; 5289def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>; 5290def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>; 5291def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>; 5292def VFMALDI : VFMD<"vfmal", "f16", 0b00>; 5293def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>; 5294def VFMALQI : VFMQ<"vfmal", "f16", 0b00>; 5295def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>; 5296} // HasNEON, HasFP16FML 5297 5298 5299def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5300 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5301def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5302 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5303def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5304 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5305def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5306 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5307let Predicates = [HasNEON, HasFullFP16] in { 5308def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5309 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5310def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5311 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5312def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5313 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5314def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5315 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5316} 5317 5318// Vector Bitwise Operations. 5319 5320def vnotd : PatFrag<(ops node:$in), 5321 (xor node:$in, ARMimmAllOnesD)>; 5322def vnotq : PatFrag<(ops node:$in), 5323 (xor node:$in, ARMimmAllOnesV)>; 5324 5325 5326// VAND : Vector Bitwise AND 5327def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 5328 v2i32, v2i32, and, 1>; 5329def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 5330 v4i32, v4i32, and, 1>; 5331 5332// VEOR : Vector Bitwise Exclusive OR 5333def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 5334 v2i32, v2i32, xor, 1>; 5335def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 5336 v4i32, v4i32, xor, 1>; 5337 5338// VORR : Vector Bitwise OR 5339def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 5340 v2i32, v2i32, or, 1>; 5341def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 5342 v4i32, v4i32, or, 1>; 5343 5344multiclass BitwisePatterns<string Name, SDPatternOperator OpNodeD, 5345 SDPatternOperator OpNodeQ> { 5346 def : Pat<(v8i8 (OpNodeD DPR:$LHS, DPR:$RHS)), 5347 (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>; 5348 def : Pat<(v4i16 (OpNodeD DPR:$LHS, DPR:$RHS)), 5349 (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>; 5350 def : Pat<(v1i64 (OpNodeD DPR:$LHS, DPR:$RHS)), 5351 (!cast<Instruction>(Name#"d") DPR:$LHS, DPR:$RHS)>; 5352 5353 def : Pat<(v16i8 (OpNodeQ QPR:$LHS, QPR:$RHS)), 5354 (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>; 5355 def : Pat<(v8i16 (OpNodeQ QPR:$LHS, QPR:$RHS)), 5356 (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>; 5357 def : Pat<(v2i64 (OpNodeQ QPR:$LHS, QPR:$RHS)), 5358 (!cast<Instruction>(Name#"q") QPR:$LHS, QPR:$RHS)>; 5359} 5360 5361let Predicates = [HasNEON] in { 5362 defm : BitwisePatterns<"VAND", and, and>; 5363 defm : BitwisePatterns<"VORR", or, or>; 5364 defm : BitwisePatterns<"VEOR", xor, xor>; 5365} 5366 5367def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 5368 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5369 IIC_VMOVImm, 5370 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5371 [(set DPR:$Vd, 5372 (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5373 let Inst{9} = SIMM{9}; 5374} 5375 5376def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 5377 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5378 IIC_VMOVImm, 5379 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5380 [(set DPR:$Vd, 5381 (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5382 let Inst{10-9} = SIMM{10-9}; 5383} 5384 5385def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 5386 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5387 IIC_VMOVImm, 5388 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5389 [(set QPR:$Vd, 5390 (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5391 let Inst{9} = SIMM{9}; 5392} 5393 5394def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 5395 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5396 IIC_VMOVImm, 5397 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5398 [(set QPR:$Vd, 5399 (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5400 let Inst{10-9} = SIMM{10-9}; 5401} 5402 5403 5404// VBIC : Vector Bitwise Bit Clear (AND NOT) 5405let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5406def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5407 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5408 "vbic", "$Vd, $Vn, $Vm", "", 5409 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 5410 (vnotd DPR:$Vm))))]>; 5411def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5412 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5413 "vbic", "$Vd, $Vn, $Vm", "", 5414 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 5415 (vnotq QPR:$Vm))))]>; 5416} 5417 5418let Predicates = [HasNEON] in { 5419 defm : BitwisePatterns<"VBIC", BinOpFrag<(and node:$LHS, (vnotd node:$RHS))>, 5420 BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>>; 5421} 5422 5423def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 5424 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5425 IIC_VMOVImm, 5426 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5427 [(set DPR:$Vd, 5428 (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5429 let Inst{9} = SIMM{9}; 5430} 5431 5432def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 5433 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5434 IIC_VMOVImm, 5435 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5436 [(set DPR:$Vd, 5437 (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5438 let Inst{10-9} = SIMM{10-9}; 5439} 5440 5441def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 5442 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5443 IIC_VMOVImm, 5444 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5445 [(set QPR:$Vd, 5446 (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5447 let Inst{9} = SIMM{9}; 5448} 5449 5450def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 5451 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5452 IIC_VMOVImm, 5453 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5454 [(set QPR:$Vd, 5455 (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5456 let Inst{10-9} = SIMM{10-9}; 5457} 5458 5459// VORN : Vector Bitwise OR NOT 5460def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 5461 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5462 "vorn", "$Vd, $Vn, $Vm", "", 5463 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 5464 (vnotd DPR:$Vm))))]>; 5465def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 5466 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5467 "vorn", "$Vd, $Vn, $Vm", "", 5468 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 5469 (vnotq QPR:$Vm))))]>; 5470 5471let Predicates = [HasNEON] in { 5472 defm : BitwisePatterns<"VORN", BinOpFrag<(or node:$LHS, (vnotd node:$RHS))>, 5473 BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>>; 5474} 5475 5476// VMVN : Vector Bitwise NOT (Immediate) 5477 5478let isReMaterializable = 1 in { 5479 5480def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 5481 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5482 "vmvn", "i16", "$Vd, $SIMM", "", 5483 [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> { 5484 let Inst{9} = SIMM{9}; 5485} 5486 5487def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 5488 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5489 "vmvn", "i16", "$Vd, $SIMM", "", 5490 [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> { 5491 let Inst{9} = SIMM{9}; 5492} 5493 5494def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 5495 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5496 "vmvn", "i32", "$Vd, $SIMM", "", 5497 [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> { 5498 let Inst{11-8} = SIMM{11-8}; 5499} 5500 5501def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 5502 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5503 "vmvn", "i32", "$Vd, $SIMM", "", 5504 [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> { 5505 let Inst{11-8} = SIMM{11-8}; 5506} 5507} 5508 5509// VMVN : Vector Bitwise NOT 5510def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 5511 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 5512 "vmvn", "$Vd, $Vm", "", 5513 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 5514def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 5515 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 5516 "vmvn", "$Vd, $Vm", "", 5517 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 5518let Predicates = [HasNEON] in { 5519def : Pat<(v1i64 (vnotd DPR:$src)), 5520 (VMVNd DPR:$src)>; 5521def : Pat<(v4i16 (vnotd DPR:$src)), 5522 (VMVNd DPR:$src)>; 5523def : Pat<(v8i8 (vnotd DPR:$src)), 5524 (VMVNd DPR:$src)>; 5525def : Pat<(v2i64 (vnotq QPR:$src)), 5526 (VMVNq QPR:$src)>; 5527def : Pat<(v8i16 (vnotq QPR:$src)), 5528 (VMVNq QPR:$src)>; 5529def : Pat<(v16i8 (vnotq QPR:$src)), 5530 (VMVNq QPR:$src)>; 5531} 5532 5533// The TwoAddress pass will not go looking for equivalent operations 5534// with different register constraints; it just inserts copies. 5535// That is why pseudo VBSP implemented. Is is expanded later into 5536// VBIT/VBIF/VBSL taking into account register constraints to avoid copies. 5537def VBSPd 5538 : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5539 IIC_VBINiD, "", 5540 [(set DPR:$Vd, 5541 (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 5542let Predicates = [HasNEON] in { 5543def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 5544 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 5545 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5546def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 5547 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 5548 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5549def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 5550 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 5551 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5552def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 5553 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 5554 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5555def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 5556 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 5557 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5558 5559def : Pat<(v8i8 (or (and DPR:$Vn, DPR:$Vd), 5560 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5561 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5562def : Pat<(v4i16 (or (and DPR:$Vn, DPR:$Vd), 5563 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5564 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5565def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 5566 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5567 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5568def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 5569 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5570 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5571} 5572 5573def VBSPq 5574 : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5575 IIC_VBINiQ, "", 5576 [(set QPR:$Vd, 5577 (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 5578let Predicates = [HasNEON] in { 5579def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 5580 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 5581 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5582def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 5583 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 5584 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5585def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 5586 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 5587 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5588def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 5589 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 5590 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5591def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 5592 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 5593 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5594 5595def : Pat<(v16i8 (or (and QPR:$Vn, QPR:$Vd), 5596 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5597 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5598def : Pat<(v8i16 (or (and QPR:$Vn, QPR:$Vd), 5599 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5600 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5601def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 5602 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5603 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5604def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 5605 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5606 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5607} 5608 5609// VBSL : Vector Bitwise Select 5610def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5611 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5612 N3RegFrm, IIC_VBINiD, 5613 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5614 []>; 5615 5616def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5617 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5618 N3RegFrm, IIC_VBINiQ, 5619 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5620 []>; 5621 5622// VBIF : Vector Bitwise Insert if False 5623// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 5624def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 5625 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5626 N3RegFrm, IIC_VBINiD, 5627 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5628 []>; 5629def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 5630 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5631 N3RegFrm, IIC_VBINiQ, 5632 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5633 []>; 5634 5635// VBIT : Vector Bitwise Insert if True 5636// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 5637def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 5638 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5639 N3RegFrm, IIC_VBINiD, 5640 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5641 []>; 5642def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 5643 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5644 N3RegFrm, IIC_VBINiQ, 5645 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5646 []>; 5647 5648// Vector Absolute Differences. 5649 5650// VABD : Vector Absolute Difference 5651defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 5652 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5653 "vabd", "s", int_arm_neon_vabds, 1>; 5654defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 5655 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5656 "vabd", "u", int_arm_neon_vabdu, 1>; 5657def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5658 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5659def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5660 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5661def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, 5662 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, 5663 Requires<[HasNEON, HasFullFP16]>; 5664def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5665 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, 5666 Requires<[HasNEON, HasFullFP16]>; 5667 5668// VABDL : Vector Absolute Difference Long (Q = | D - D |) 5669defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5670 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5671defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5672 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5673 5674let Predicates = [HasNEON] in { 5675def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), 5676 (VABDLuv8i16 DPR:$opA, DPR:$opB)>; 5677def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), 5678 (VABDLuv4i32 DPR:$opA, DPR:$opB)>; 5679} 5680 5681// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the 5682// shift/xor pattern for ABS. 5683 5684def abd_shr : 5685 PatFrag<(ops node:$in1, node:$in2, node:$shift), 5686 (ARMvshrsImm (sub (zext node:$in1), 5687 (zext node:$in2)), (i32 $shift))>; 5688 5689let Predicates = [HasNEON] in { 5690def : Pat<(xor (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)), 5691 (v2i64 (add (sub (zext (v2i32 DPR:$opA)), 5692 (zext (v2i32 DPR:$opB))), 5693 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), 5694 (VABDLuv2i64 DPR:$opA, DPR:$opB)>; 5695} 5696 5697// VABA : Vector Absolute Difference and Accumulate 5698defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5699 "vaba", "s", int_arm_neon_vabds, add>; 5700defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5701 "vaba", "u", int_arm_neon_vabdu, add>; 5702 5703// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5704defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5705 "vabal", "s", int_arm_neon_vabds, zext, add>; 5706defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5707 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5708 5709// Vector Maximum and Minimum. 5710 5711// VMAX : Vector Maximum 5712defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5713 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5714 "vmax", "s", smax, 1>; 5715defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5716 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5717 "vmax", "u", umax, 1>; 5718def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5719 "vmax", "f32", 5720 v2f32, v2f32, fmaximum, 1>; 5721def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5722 "vmax", "f32", 5723 v4f32, v4f32, fmaximum, 1>; 5724def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, 5725 "vmax", "f16", 5726 v4f16, v4f16, fmaximum, 1>, 5727 Requires<[HasNEON, HasFullFP16]>; 5728def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5729 "vmax", "f16", 5730 v8f16, v8f16, fmaximum, 1>, 5731 Requires<[HasNEON, HasFullFP16]>; 5732 5733// VMAXNM 5734let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5735 def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5736 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5737 v2f32, v2f32, fmaxnum, 1>, 5738 Requires<[HasV8, HasNEON]>; 5739 def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5740 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5741 v4f32, v4f32, fmaxnum, 1>, 5742 Requires<[HasV8, HasNEON]>; 5743 def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, 5744 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5745 v4f16, v4f16, fmaxnum, 1>, 5746 Requires<[HasV8, HasNEON, HasFullFP16]>; 5747 def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, 5748 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5749 v8f16, v8f16, fmaxnum, 1>, 5750 Requires<[HasV8, HasNEON, HasFullFP16]>; 5751} 5752 5753// VMIN : Vector Minimum 5754defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5755 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5756 "vmin", "s", smin, 1>; 5757defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5758 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5759 "vmin", "u", umin, 1>; 5760def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5761 "vmin", "f32", 5762 v2f32, v2f32, fminimum, 1>; 5763def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5764 "vmin", "f32", 5765 v4f32, v4f32, fminimum, 1>; 5766def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, 5767 "vmin", "f16", 5768 v4f16, v4f16, fminimum, 1>, 5769 Requires<[HasNEON, HasFullFP16]>; 5770def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5771 "vmin", "f16", 5772 v8f16, v8f16, fminimum, 1>, 5773 Requires<[HasNEON, HasFullFP16]>; 5774 5775// VMINNM 5776let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5777 def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5778 N3RegFrm, NoItinerary, "vminnm", "f32", 5779 v2f32, v2f32, fminnum, 1>, 5780 Requires<[HasV8, HasNEON]>; 5781 def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5782 N3RegFrm, NoItinerary, "vminnm", "f32", 5783 v4f32, v4f32, fminnum, 1>, 5784 Requires<[HasV8, HasNEON]>; 5785 def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, 5786 N3RegFrm, NoItinerary, "vminnm", "f16", 5787 v4f16, v4f16, fminnum, 1>, 5788 Requires<[HasV8, HasNEON, HasFullFP16]>; 5789 def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, 5790 N3RegFrm, NoItinerary, "vminnm", "f16", 5791 v8f16, v8f16, fminnum, 1>, 5792 Requires<[HasV8, HasNEON, HasFullFP16]>; 5793} 5794 5795// Vector Pairwise Operations. 5796 5797// VPADD : Vector Pairwise Add 5798def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5799 "vpadd", "i8", 5800 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5801def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5802 "vpadd", "i16", 5803 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5804def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5805 "vpadd", "i32", 5806 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5807def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5808 IIC_VPBIND, "vpadd", "f32", 5809 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5810def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, 5811 IIC_VPBIND, "vpadd", "f16", 5812 v4f16, v4f16, int_arm_neon_vpadd, 0>, 5813 Requires<[HasNEON, HasFullFP16]>; 5814 5815// VPADDL : Vector Pairwise Add Long 5816defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5817 int_arm_neon_vpaddls>; 5818defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5819 int_arm_neon_vpaddlu>; 5820 5821// VPADAL : Vector Pairwise Add and Accumulate Long 5822defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5823 int_arm_neon_vpadals>; 5824defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5825 int_arm_neon_vpadalu>; 5826 5827// VPMAX : Vector Pairwise Maximum 5828def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5829 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5830def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5831 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5832def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5833 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5834def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5835 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5836def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5837 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5838def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5839 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5840def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5841 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5842def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5843 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, 5844 Requires<[HasNEON, HasFullFP16]>; 5845 5846// VPMIN : Vector Pairwise Minimum 5847def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5848 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5849def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5850 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5851def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5852 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5853def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5854 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5855def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5856 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5857def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5858 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5859def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5860 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5861def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5862 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, 5863 Requires<[HasNEON, HasFullFP16]>; 5864 5865// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5866 5867// VRECPE : Vector Reciprocal Estimate 5868def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5869 IIC_VUNAD, "vrecpe", "u32", 5870 v2i32, v2i32, int_arm_neon_vrecpe>; 5871def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5872 IIC_VUNAQ, "vrecpe", "u32", 5873 v4i32, v4i32, int_arm_neon_vrecpe>; 5874def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5875 IIC_VUNAD, "vrecpe", "f32", 5876 v2f32, v2f32, int_arm_neon_vrecpe>; 5877def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5878 IIC_VUNAQ, "vrecpe", "f32", 5879 v4f32, v4f32, int_arm_neon_vrecpe>; 5880def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5881 IIC_VUNAD, "vrecpe", "f16", 5882 v4f16, v4f16, int_arm_neon_vrecpe>, 5883 Requires<[HasNEON, HasFullFP16]>; 5884def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5885 IIC_VUNAQ, "vrecpe", "f16", 5886 v8f16, v8f16, int_arm_neon_vrecpe>, 5887 Requires<[HasNEON, HasFullFP16]>; 5888 5889// VRECPS : Vector Reciprocal Step 5890def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5891 IIC_VRECSD, "vrecps", "f32", 5892 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5893def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5894 IIC_VRECSQ, "vrecps", "f32", 5895 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5896def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5897 IIC_VRECSD, "vrecps", "f16", 5898 v4f16, v4f16, int_arm_neon_vrecps, 1>, 5899 Requires<[HasNEON, HasFullFP16]>; 5900def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5901 IIC_VRECSQ, "vrecps", "f16", 5902 v8f16, v8f16, int_arm_neon_vrecps, 1>, 5903 Requires<[HasNEON, HasFullFP16]>; 5904 5905// VRSQRTE : Vector Reciprocal Square Root Estimate 5906def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5907 IIC_VUNAD, "vrsqrte", "u32", 5908 v2i32, v2i32, int_arm_neon_vrsqrte>; 5909def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5910 IIC_VUNAQ, "vrsqrte", "u32", 5911 v4i32, v4i32, int_arm_neon_vrsqrte>; 5912def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5913 IIC_VUNAD, "vrsqrte", "f32", 5914 v2f32, v2f32, int_arm_neon_vrsqrte>; 5915def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5916 IIC_VUNAQ, "vrsqrte", "f32", 5917 v4f32, v4f32, int_arm_neon_vrsqrte>; 5918def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5919 IIC_VUNAD, "vrsqrte", "f16", 5920 v4f16, v4f16, int_arm_neon_vrsqrte>, 5921 Requires<[HasNEON, HasFullFP16]>; 5922def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5923 IIC_VUNAQ, "vrsqrte", "f16", 5924 v8f16, v8f16, int_arm_neon_vrsqrte>, 5925 Requires<[HasNEON, HasFullFP16]>; 5926 5927// VRSQRTS : Vector Reciprocal Square Root Step 5928def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5929 IIC_VRECSD, "vrsqrts", "f32", 5930 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5931def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5932 IIC_VRECSQ, "vrsqrts", "f32", 5933 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5934def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5935 IIC_VRECSD, "vrsqrts", "f16", 5936 v4f16, v4f16, int_arm_neon_vrsqrts, 1>, 5937 Requires<[HasNEON, HasFullFP16]>; 5938def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5939 IIC_VRECSQ, "vrsqrts", "f16", 5940 v8f16, v8f16, int_arm_neon_vrsqrts, 1>, 5941 Requires<[HasNEON, HasFullFP16]>; 5942 5943// Vector Shifts. 5944 5945// VSHL : Vector Shift 5946defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5947 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5948 "vshl", "s", int_arm_neon_vshifts>; 5949defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5950 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5951 "vshl", "u", int_arm_neon_vshiftu>; 5952 5953let Predicates = [HasNEON] in { 5954def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5955 (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>; 5956def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5957 (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>; 5958def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5959 (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>; 5960def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5961 (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>; 5962def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5963 (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>; 5964def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5965 (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>; 5966def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5967 (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>; 5968def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5969 (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>; 5970 5971def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5972 (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>; 5973def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5974 (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>; 5975def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5976 (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>; 5977def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5978 (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>; 5979def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5980 (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>; 5981def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5982 (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>; 5983def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5984 (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>; 5985def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5986 (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>; 5987 5988} 5989 5990// VSHL : Vector Shift Left (Immediate) 5991defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>; 5992 5993// VSHR : Vector Shift Right (Immediate) 5994defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 5995 ARMvshrsImm>; 5996defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 5997 ARMvshruImm>; 5998 5999// VSHLL : Vector Shift Left Long 6000defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 6001 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>; 6002defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 6003 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>; 6004 6005// VSHLL : Vector Shift Left Long (with maximum shift count) 6006class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 6007 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 6008 ValueType OpTy, Operand ImmTy> 6009 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 6010 ResTy, OpTy, ImmTy, null_frag> { 6011 let Inst{21-16} = op21_16; 6012 let DecoderMethod = "DecodeVSHLMaxInstruction"; 6013} 6014def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 6015 v8i16, v8i8, imm8>; 6016def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 6017 v4i32, v4i16, imm16>; 6018def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 6019 v2i64, v2i32, imm32>; 6020 6021let Predicates = [HasNEON] in { 6022def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))), 6023 (VSHLLi8 DPR:$Rn, 8)>; 6024def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))), 6025 (VSHLLi16 DPR:$Rn, 16)>; 6026def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))), 6027 (VSHLLi32 DPR:$Rn, 32)>; 6028def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))), 6029 (VSHLLi8 DPR:$Rn, 8)>; 6030def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))), 6031 (VSHLLi16 DPR:$Rn, 16)>; 6032def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))), 6033 (VSHLLi32 DPR:$Rn, 32)>; 6034def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))), 6035 (VSHLLi8 DPR:$Rn, 8)>; 6036def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))), 6037 (VSHLLi16 DPR:$Rn, 16)>; 6038def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))), 6039 (VSHLLi32 DPR:$Rn, 32)>; 6040} 6041 6042// VSHRN : Vector Shift Right and Narrow 6043defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 6044 PatFrag<(ops node:$Rn, node:$amt), 6045 (trunc (ARMvshrsImm node:$Rn, node:$amt))>>; 6046 6047let Predicates = [HasNEON] in { 6048def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))), 6049 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 6050def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))), 6051 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 6052def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))), 6053 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 6054} 6055 6056// VRSHL : Vector Rounding Shift 6057defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 6058 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6059 "vrshl", "s", int_arm_neon_vrshifts>; 6060defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 6061 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6062 "vrshl", "u", int_arm_neon_vrshiftu>; 6063// VRSHR : Vector Rounding Shift Right 6064defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 6065 NEONvrshrsImm>; 6066defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 6067 NEONvrshruImm>; 6068 6069// VRSHRN : Vector Rounding Shift Right and Narrow 6070defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 6071 NEONvrshrnImm>; 6072 6073// VQSHL : Vector Saturating Shift 6074defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 6075 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6076 "vqshl", "s", int_arm_neon_vqshifts>; 6077defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 6078 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6079 "vqshl", "u", int_arm_neon_vqshiftu>; 6080// VQSHL : Vector Saturating Shift Left (Immediate) 6081defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>; 6082defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>; 6083 6084// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 6085defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>; 6086 6087// VQSHRN : Vector Saturating Shift Right and Narrow 6088defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 6089 NEONvqshrnsImm>; 6090defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 6091 NEONvqshrnuImm>; 6092 6093// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 6094defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 6095 NEONvqshrnsuImm>; 6096 6097// VQRSHL : Vector Saturating Rounding Shift 6098defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 6099 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6100 "vqrshl", "s", int_arm_neon_vqrshifts>; 6101defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 6102 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6103 "vqrshl", "u", int_arm_neon_vqrshiftu>; 6104 6105// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 6106defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 6107 NEONvqrshrnsImm>; 6108defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 6109 NEONvqrshrnuImm>; 6110 6111// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 6112defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 6113 NEONvqrshrnsuImm>; 6114 6115// VSRA : Vector Shift Right and Accumulate 6116defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>; 6117defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>; 6118// VRSRA : Vector Rounding Shift Right and Accumulate 6119defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>; 6120defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>; 6121 6122// VSLI : Vector Shift Left and Insert 6123defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 6124 6125// VSRI : Vector Shift Right and Insert 6126defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 6127 6128// Vector Absolute and Saturating Absolute. 6129 6130// VABS : Vector Absolute Value 6131defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 6132 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; 6133def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6134 "vabs", "f32", 6135 v2f32, v2f32, fabs>; 6136def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6137 "vabs", "f32", 6138 v4f32, v4f32, fabs>; 6139def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6140 "vabs", "f16", 6141 v4f16, v4f16, fabs>, 6142 Requires<[HasNEON, HasFullFP16]>; 6143def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6144 "vabs", "f16", 6145 v8f16, v8f16, fabs>, 6146 Requires<[HasNEON, HasFullFP16]>; 6147 6148// VQABS : Vector Saturating Absolute Value 6149defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 6150 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 6151 int_arm_neon_vqabs>; 6152 6153// Vector Negate. 6154 6155def vnegd : PatFrag<(ops node:$in), 6156 (sub ARMimmAllZerosD, node:$in)>; 6157def vnegq : PatFrag<(ops node:$in), 6158 (sub ARMimmAllZerosV, node:$in)>; 6159 6160class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6161 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 6162 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 6163 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 6164class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6165 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 6166 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 6167 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 6168 6169// VNEG : Vector Negate (integer) 6170def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 6171def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 6172def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 6173def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 6174def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 6175def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 6176 6177// VNEG : Vector Negate (floating-point) 6178def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 6179 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6180 "vneg", "f32", "$Vd, $Vm", "", 6181 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 6182def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 6183 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6184 "vneg", "f32", "$Vd, $Vm", "", 6185 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 6186def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, 6187 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6188 "vneg", "f16", "$Vd, $Vm", "", 6189 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, 6190 Requires<[HasNEON, HasFullFP16]>; 6191def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, 6192 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6193 "vneg", "f16", "$Vd, $Vm", "", 6194 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, 6195 Requires<[HasNEON, HasFullFP16]>; 6196 6197let Predicates = [HasNEON] in { 6198def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 6199def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 6200def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 6201def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 6202def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 6203def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 6204} 6205 6206// VQNEG : Vector Saturating Negate 6207defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 6208 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 6209 int_arm_neon_vqneg>; 6210 6211// Vector Bit Counting Operations. 6212 6213// VCLS : Vector Count Leading Sign Bits 6214defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 6215 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 6216 int_arm_neon_vcls>; 6217// VCLZ : Vector Count Leading Zeros 6218defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 6219 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 6220 ctlz>; 6221// VCNT : Vector Count One Bits 6222def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6223 IIC_VCNTiD, "vcnt", "8", 6224 v8i8, v8i8, ctpop>; 6225def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6226 IIC_VCNTiQ, "vcnt", "8", 6227 v16i8, v16i8, ctpop>; 6228 6229// Vector Swap 6230def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 6231 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 6232 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6233 []>; 6234def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 6235 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 6236 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6237 []>; 6238 6239// Vector Move Operations. 6240 6241// VMOV : Vector Move (Register) 6242def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6243 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6244def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6245 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6246 6247// VMOV : Vector Move (Immediate) 6248 6249// Although VMOVs are not strictly speaking cheap, they are as expensive 6250// as their copies counterpart (VORR), so we should prefer rematerialization 6251// over splitting when it applies. 6252let isReMaterializable = 1, isAsCheapAsAMove=1 in { 6253def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 6254 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6255 "vmov", "i8", "$Vd, $SIMM", "", 6256 [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>; 6257def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 6258 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6259 "vmov", "i8", "$Vd, $SIMM", "", 6260 [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>; 6261 6262def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 6263 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6264 "vmov", "i16", "$Vd, $SIMM", "", 6265 [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> { 6266 let Inst{9} = SIMM{9}; 6267} 6268 6269def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 6270 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6271 "vmov", "i16", "$Vd, $SIMM", "", 6272 [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> { 6273 let Inst{9} = SIMM{9}; 6274} 6275 6276def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 6277 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6278 "vmov", "i32", "$Vd, $SIMM", "", 6279 [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> { 6280 let Inst{11-8} = SIMM{11-8}; 6281} 6282 6283def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 6284 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6285 "vmov", "i32", "$Vd, $SIMM", "", 6286 [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> { 6287 let Inst{11-8} = SIMM{11-8}; 6288} 6289 6290def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 6291 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6292 "vmov", "i64", "$Vd, $SIMM", "", 6293 [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>; 6294def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 6295 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6296 "vmov", "i64", "$Vd, $SIMM", "", 6297 [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>; 6298 6299def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 6300 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6301 "vmov", "f32", "$Vd, $SIMM", "", 6302 [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>; 6303def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 6304 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6305 "vmov", "f32", "$Vd, $SIMM", "", 6306 [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>; 6307} // isReMaterializable, isAsCheapAsAMove 6308 6309// Add support for bytes replication feature, so it could be GAS compatible. 6310multiclass NEONImmReplicateI8InstAlias<ValueType To> { 6311 // E.g. instructions below: 6312 // "vmov.i32 d0, #0xffffffff" 6313 // "vmov.i32 d0, #0xabababab" 6314 // "vmov.i16 d0, #0xabab" 6315 // are incorrect, but we could deal with such cases. 6316 // For last two instructions, for example, it should emit: 6317 // "vmov.i8 d0, #0xab" 6318 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6319 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6320 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6321 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6322 // Also add same support for VMVN instructions. So instruction: 6323 // "vmvn.i32 d0, #0xabababab" 6324 // actually means: 6325 // "vmov.i8 d0, #0x54" 6326 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6327 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6328 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6329 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6330} 6331 6332defm : NEONImmReplicateI8InstAlias<i16>; 6333defm : NEONImmReplicateI8InstAlias<i32>; 6334defm : NEONImmReplicateI8InstAlias<i64>; 6335 6336// Similar to above for types other than i8, e.g.: 6337// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00" 6338// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000" 6339// In this case we do not canonicalize VMVN to VMOV 6340multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16, 6341 NeonI NV8, NeonI NV16, ValueType To> { 6342 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6343 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6344 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6345 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6346 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6347 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6348 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6349 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6350} 6351 6352defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6353 VMVNv4i16, VMVNv8i16, i32>; 6354defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6355 VMVNv4i16, VMVNv8i16, i64>; 6356defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32, 6357 VMVNv2i32, VMVNv4i32, i64>; 6358// TODO: add "VMOV <-> VMVN" conversion for cases like 6359// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55" 6360// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00" 6361 6362// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 6363// require zero cycles to execute so they should be used wherever possible for 6364// setting a register to zero. 6365 6366// Even without these pseudo-insts we would probably end up with the correct 6367// instruction, but we could not mark the general ones with "isAsCheapAsAMove" 6368// since they are sometimes rather expensive (in general). 6369 6370let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 6371 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 6372 [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))], 6373 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 6374 Requires<[HasZCZ]>; 6375 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 6376 [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))], 6377 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 6378 Requires<[HasZCZ]>; 6379} 6380 6381// VMOV : Vector Get Lane (move scalar to ARM core register) 6382 6383def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 6384 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6385 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 6386 [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V), 6387 imm:$lane))]> { 6388 let Inst{21} = lane{2}; 6389 let Inst{6-5} = lane{1-0}; 6390} 6391def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 6392 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6393 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 6394 [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V), 6395 imm:$lane))]> { 6396 let Inst{21} = lane{1}; 6397 let Inst{6} = lane{0}; 6398} 6399def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 6400 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6401 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 6402 [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V), 6403 imm:$lane))]> { 6404 let Inst{21} = lane{2}; 6405 let Inst{6-5} = lane{1-0}; 6406} 6407def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 6408 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6409 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 6410 [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V), 6411 imm:$lane))]> { 6412 let Inst{21} = lane{1}; 6413 let Inst{6} = lane{0}; 6414} 6415def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 6416 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 6417 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 6418 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 6419 imm:$lane))]>, 6420 Requires<[HasFPRegs, HasFastVGETLNi32]> { 6421 let Inst{21} = lane{0}; 6422} 6423let Predicates = [HasNEON] in { 6424// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 6425def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane), 6426 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6427 (DSubReg_i8_reg imm:$lane))), 6428 (SubReg_i8_lane imm:$lane))>; 6429def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane), 6430 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6431 (DSubReg_i16_reg imm:$lane))), 6432 (SubReg_i16_lane imm:$lane))>; 6433def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane), 6434 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6435 (DSubReg_i8_reg imm:$lane))), 6436 (SubReg_i8_lane imm:$lane))>; 6437def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane), 6438 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6439 (DSubReg_i16_reg imm:$lane))), 6440 (SubReg_i16_lane imm:$lane))>; 6441} 6442def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6443 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 6444 (DSubReg_i32_reg imm:$lane))), 6445 (SubReg_i32_lane imm:$lane))>, 6446 Requires<[HasNEON, HasFastVGETLNi32]>; 6447def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 6448 (COPY_TO_REGCLASS 6449 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6450 Requires<[HasNEON, HasSlowVGETLNi32]>; 6451def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6452 (COPY_TO_REGCLASS 6453 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6454 Requires<[HasNEON, HasSlowVGETLNi32]>; 6455let Predicates = [HasNEON] in { 6456def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 6457 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 6458 (SSubReg_f32_reg imm:$src2))>; 6459def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 6460 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 6461 (SSubReg_f32_reg imm:$src2))>; 6462//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 6463// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6464def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 6465 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6466} 6467 6468multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> { 6469 def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane), 6470 (EXTRACT_SUBREG 6471 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6472 (SSubReg_f16_reg imm_even:$lane))>; 6473 def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane), 6474 (EXTRACT_SUBREG 6475 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6476 (SSubReg_f16_reg imm_even:$lane))>; 6477} 6478 6479multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> { 6480 def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane), 6481 (COPY_TO_REGCLASS 6482 (VMOVH (EXTRACT_SUBREG 6483 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6484 (SSubReg_f16_reg imm_odd:$lane))), 6485 HPR)>; 6486 def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane), 6487 (COPY_TO_REGCLASS 6488 (VMOVH (EXTRACT_SUBREG 6489 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6490 (SSubReg_f16_reg imm_odd:$lane))), 6491 HPR)>; 6492} 6493 6494let Predicates = [HasNEON] in { 6495 defm : ExtractEltEvenF16<v4f16, v8f16>; 6496 defm : ExtractEltOddF16VMOVH<v4f16, v8f16>; 6497} 6498 6499let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in { 6500 // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes 6501 defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>; 6502} 6503 6504let Predicates = [HasBF16, HasNEON] in { 6505 defm : ExtractEltEvenF16<v4bf16, v8bf16>; 6506 6507 // Otherwise, if VMOVH is not available resort to extracting the odd lane 6508 // into a GPR and then moving to HPR 6509 def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane), 6510 (COPY_TO_REGCLASS 6511 (VGETLNu16 (v4bf16 DPR:$src), imm:$lane), 6512 HPR)>; 6513 6514 def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane), 6515 (COPY_TO_REGCLASS 6516 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6517 (DSubReg_i16_reg imm:$lane))), 6518 (SubReg_i16_lane imm:$lane)), 6519 HPR)>; 6520} 6521 6522// VMOV : Vector Set Lane (move ARM core register to scalar) 6523 6524let Constraints = "$src1 = $V" in { 6525def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 6526 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 6527 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 6528 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 6529 GPR:$R, imm:$lane))]> { 6530 let Inst{21} = lane{2}; 6531 let Inst{6-5} = lane{1-0}; 6532} 6533def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 6534 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 6535 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 6536 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 6537 GPR:$R, imm:$lane))]> { 6538 let Inst{21} = lane{1}; 6539 let Inst{6} = lane{0}; 6540} 6541def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 6542 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 6543 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 6544 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 6545 GPR:$R, imm:$lane))]>, 6546 Requires<[HasVFP2]> { 6547 let Inst{21} = lane{0}; 6548 // This instruction is equivalent as 6549 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 6550 let isInsertSubreg = 1; 6551} 6552} 6553 6554// TODO: for odd lanes we could optimize this a bit by using the VINS 6555// FullFP16 instruction when it is available 6556multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> { 6557 def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6558 (VT4 (VSETLNi16 DPR:$src1, 6559 (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>; 6560 def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6561 (VT8 (INSERT_SUBREG QPR:$src1, 6562 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6563 (DSubReg_i16_reg imm:$lane))), 6564 (COPY_TO_REGCLASS HPR:$src2, GPR), 6565 (SubReg_i16_lane imm:$lane))), 6566 (DSubReg_i16_reg imm:$lane)))>; 6567} 6568 6569let Predicates = [HasNEON] in { 6570def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 6571 (v16i8 (INSERT_SUBREG QPR:$src1, 6572 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 6573 (DSubReg_i8_reg imm:$lane))), 6574 GPR:$src2, (SubReg_i8_lane imm:$lane))), 6575 (DSubReg_i8_reg imm:$lane)))>; 6576def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 6577 (v8i16 (INSERT_SUBREG QPR:$src1, 6578 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6579 (DSubReg_i16_reg imm:$lane))), 6580 GPR:$src2, (SubReg_i16_lane imm:$lane))), 6581 (DSubReg_i16_reg imm:$lane)))>; 6582def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 6583 (v4i32 (INSERT_SUBREG QPR:$src1, 6584 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 6585 (DSubReg_i32_reg imm:$lane))), 6586 GPR:$src2, (SubReg_i32_lane imm:$lane))), 6587 (DSubReg_i32_reg imm:$lane)))>; 6588 6589def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 6590 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 6591 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6592def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 6593 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 6594 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6595 6596defm : InsertEltF16<f16, v4f16, v8f16>; 6597 6598def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6599 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6600 6601def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 6602 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6603def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 6604 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 6605def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 6606 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6607 6608def : Pat<(v4f16 (scalar_to_vector (f16 HPR:$src))), 6609 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>; 6610def : Pat<(v8f16 (scalar_to_vector (f16 HPR:$src))), 6611 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>; 6612 6613def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 6614 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6615def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 6616 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6617def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 6618 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6619 6620def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 6621 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6622 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6623 dsub_0)>; 6624def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 6625 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6626 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6627 dsub_0)>; 6628def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 6629 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6630 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6631 dsub_0)>; 6632} 6633 6634let Predicates = [HasNEON, HasBF16] in 6635defm : InsertEltF16<bf16, v4bf16, v8bf16>; 6636 6637// VDUP : Vector Duplicate (from ARM core register to all elements) 6638 6639class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6640 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 6641 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6642 [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6643class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6644 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 6645 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6646 [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6647 6648def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 6649def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 6650def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 6651 Requires<[HasNEON, HasFastVDUP32]>; 6652def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 6653def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 6654def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 6655 6656// ARMvdup patterns for uarchs with fast VDUP.32. 6657def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 6658 Requires<[HasNEON,HasFastVDUP32]>; 6659def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>, 6660 Requires<[HasNEON]>; 6661 6662// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 6663def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 6664 Requires<[HasNEON,HasSlowVDUP32]>; 6665def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 6666 Requires<[HasNEON,HasSlowVDUP32]>; 6667 6668// VDUP : Vector Duplicate Lane (from scalar to all elements) 6669 6670class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 6671 ValueType Ty, Operand IdxTy> 6672 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6673 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 6674 [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>; 6675 6676class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 6677 ValueType ResTy, ValueType OpTy, Operand IdxTy> 6678 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6679 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 6680 [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm), 6681 VectorIndex32:$lane)))]>; 6682 6683// Inst{19-16} is partially specified depending on the element size. 6684 6685def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 6686 bits<3> lane; 6687 let Inst{19-17} = lane{2-0}; 6688} 6689def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 6690 bits<2> lane; 6691 let Inst{19-18} = lane{1-0}; 6692} 6693def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 6694 bits<1> lane; 6695 let Inst{19} = lane{0}; 6696} 6697def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 6698 bits<3> lane; 6699 let Inst{19-17} = lane{2-0}; 6700} 6701def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 6702 bits<2> lane; 6703 let Inst{19-18} = lane{1-0}; 6704} 6705def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 6706 bits<1> lane; 6707 let Inst{19} = lane{0}; 6708} 6709 6710let Predicates = [HasNEON] in { 6711def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)), 6712 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6713 6714def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6715 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6716 6717def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6718 (VDUPLN32q DPR:$Vm, imm:$lane)>; 6719 6720def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)), 6721 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 6722 (DSubReg_i8_reg imm:$lane))), 6723 (SubReg_i8_lane imm:$lane)))>; 6724def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)), 6725 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 6726 (DSubReg_i16_reg imm:$lane))), 6727 (SubReg_i16_lane imm:$lane)))>; 6728def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)), 6729 (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src, 6730 (DSubReg_i16_reg imm:$lane))), 6731 (SubReg_i16_lane imm:$lane)))>; 6732def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)), 6733 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 6734 (DSubReg_i32_reg imm:$lane))), 6735 (SubReg_i32_lane imm:$lane)))>; 6736def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)), 6737 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 6738 (DSubReg_i32_reg imm:$lane))), 6739 (SubReg_i32_lane imm:$lane)))>; 6740 6741def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))), 6742 (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6743 (f16 HPR:$src), ssub_0), (i32 0)))>; 6744def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))), 6745 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6746 SPR:$src, ssub_0), (i32 0)))>; 6747def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))), 6748 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6749 SPR:$src, ssub_0), (i32 0)))>; 6750def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))), 6751 (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6752 (f16 HPR:$src), ssub_0), (i32 0)))>; 6753} 6754 6755let Predicates = [HasNEON, HasBF16] in { 6756def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)), 6757 (VDUPLN16d DPR:$Vm, imm:$lane)>; 6758 6759def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)), 6760 (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src, 6761 (DSubReg_i16_reg imm:$lane))), 6762 (SubReg_i16_lane imm:$lane)))>; 6763 6764def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))), 6765 (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6766 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6767def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))), 6768 (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6769 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6770} 6771 6772// VMOVN : Vector Narrowing Move 6773defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 6774 "vmovn", "i", trunc>; 6775// VQMOVN : Vector Saturating Narrowing Move 6776defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 6777 "vqmovn", "s", int_arm_neon_vqmovns>; 6778defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 6779 "vqmovn", "u", int_arm_neon_vqmovnu>; 6780defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 6781 "vqmovun", "s", int_arm_neon_vqmovnsu>; 6782// VMOVL : Vector Lengthening Move 6783defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 6784defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 6785 6786let Predicates = [HasNEON] in { 6787def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 6788def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 6789def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 6790} 6791 6792// Vector Conversions. 6793 6794// VCVT : Vector Convert Between Floating-Point and Integers 6795def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6796 v2i32, v2f32, fp_to_sint>; 6797def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6798 v2i32, v2f32, fp_to_uint>; 6799def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6800 v2f32, v2i32, sint_to_fp>; 6801def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6802 v2f32, v2i32, uint_to_fp>; 6803 6804def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6805 v4i32, v4f32, fp_to_sint>; 6806def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6807 v4i32, v4f32, fp_to_uint>; 6808def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6809 v4f32, v4i32, sint_to_fp>; 6810def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6811 v4f32, v4i32, uint_to_fp>; 6812 6813def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6814 v4i16, v4f16, fp_to_sint>, 6815 Requires<[HasNEON, HasFullFP16]>; 6816def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6817 v4i16, v4f16, fp_to_uint>, 6818 Requires<[HasNEON, HasFullFP16]>; 6819def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6820 v4f16, v4i16, sint_to_fp>, 6821 Requires<[HasNEON, HasFullFP16]>; 6822def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6823 v4f16, v4i16, uint_to_fp>, 6824 Requires<[HasNEON, HasFullFP16]>; 6825 6826def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6827 v8i16, v8f16, fp_to_sint>, 6828 Requires<[HasNEON, HasFullFP16]>; 6829def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6830 v8i16, v8f16, fp_to_uint>, 6831 Requires<[HasNEON, HasFullFP16]>; 6832def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6833 v8f16, v8i16, sint_to_fp>, 6834 Requires<[HasNEON, HasFullFP16]>; 6835def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6836 v8f16, v8i16, uint_to_fp>, 6837 Requires<[HasNEON, HasFullFP16]>; 6838 6839// VCVT{A, N, P, M} 6840multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 6841 SDPatternOperator IntU> { 6842 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6843 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6844 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 6845 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6846 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 6847 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6848 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 6849 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6850 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 6851 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6852 "s16.f16", v4i16, v4f16, IntS>, 6853 Requires<[HasV8, HasNEON, HasFullFP16]>; 6854 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6855 "s16.f16", v8i16, v8f16, IntS>, 6856 Requires<[HasV8, HasNEON, HasFullFP16]>; 6857 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6858 "u16.f16", v4i16, v4f16, IntU>, 6859 Requires<[HasV8, HasNEON, HasFullFP16]>; 6860 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6861 "u16.f16", v8i16, v8f16, IntU>, 6862 Requires<[HasV8, HasNEON, HasFullFP16]>; 6863 } 6864} 6865 6866defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 6867defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 6868defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 6869defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 6870 6871// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 6872let DecoderMethod = "DecodeVCVTD" in { 6873def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6874 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 6875def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6876 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 6877def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6878 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 6879def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6880 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 6881let Predicates = [HasNEON, HasFullFP16] in { 6882def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6883 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; 6884def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6885 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; 6886def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6887 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; 6888def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6889 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; 6890} // Predicates = [HasNEON, HasFullFP16] 6891} 6892 6893let DecoderMethod = "DecodeVCVTQ" in { 6894def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6895 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 6896def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6897 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 6898def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6899 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 6900def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6901 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 6902let Predicates = [HasNEON, HasFullFP16] in { 6903def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6904 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; 6905def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6906 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; 6907def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6908 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; 6909def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6910 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; 6911} // Predicates = [HasNEON, HasFullFP16] 6912} 6913 6914def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 6915 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6916def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 6917 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6918def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 6919 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6920def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 6921 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6922 6923def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 6924 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6925def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 6926 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6927def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 6928 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6929def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 6930 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6931 6932def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", 6933 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6934def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", 6935 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6936def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", 6937 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6938def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", 6939 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6940 6941def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", 6942 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6943def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", 6944 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6945def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", 6946 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6947def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", 6948 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6949 6950 6951// VCVT : Vector Convert Between Half-Precision and Single-Precision. 6952def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 6953 IIC_VUNAQ, "vcvt", "f16.f32", 6954 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 6955 Requires<[HasNEON, HasFP16]>; 6956def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 6957 IIC_VUNAQ, "vcvt", "f32.f16", 6958 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 6959 Requires<[HasNEON, HasFP16]>; 6960 6961// Vector Reverse. 6962 6963// VREV64 : Vector Reverse elements within 64-bit doublewords 6964 6965class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6966 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 6967 (ins DPR:$Vm), IIC_VMOVD, 6968 OpcodeStr, Dt, "$Vd, $Vm", "", 6969 [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>; 6970class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6971 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 6972 (ins QPR:$Vm), IIC_VMOVQ, 6973 OpcodeStr, Dt, "$Vd, $Vm", "", 6974 [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>; 6975 6976def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 6977def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 6978def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 6979let Predicates = [HasNEON] in { 6980def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 6981} 6982 6983def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 6984def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 6985def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 6986 6987let Predicates = [HasNEON] in { 6988 def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), 6989 (VREV64q32 QPR:$Vm)>; 6990 def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), 6991 (VREV64q16 QPR:$Vm)>; 6992 def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), 6993 (VREV64d16 DPR:$Vm)>; 6994} 6995 6996// VREV32 : Vector Reverse elements within 32-bit words 6997 6998class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6999 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 7000 (ins DPR:$Vm), IIC_VMOVD, 7001 OpcodeStr, Dt, "$Vd, $Vm", "", 7002 [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>; 7003class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 7004 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 7005 (ins QPR:$Vm), IIC_VMOVQ, 7006 OpcodeStr, Dt, "$Vd, $Vm", "", 7007 [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>; 7008 7009def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 7010def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 7011 7012def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 7013def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 7014 7015let Predicates = [HasNEON] in { 7016 def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))), 7017 (VREV32q16 QPR:$Vm)>; 7018 def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))), 7019 (VREV32d16 DPR:$Vm)>; 7020} 7021 7022// VREV16 : Vector Reverse elements within 16-bit halfwords 7023 7024class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 7025 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 7026 (ins DPR:$Vm), IIC_VMOVD, 7027 OpcodeStr, Dt, "$Vd, $Vm", "", 7028 [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>; 7029class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 7030 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 7031 (ins QPR:$Vm), IIC_VMOVQ, 7032 OpcodeStr, Dt, "$Vd, $Vm", "", 7033 [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>; 7034 7035def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 7036def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 7037 7038// Other Vector Shuffles. 7039 7040// Aligned extractions: really just dropping registers 7041 7042class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 7043 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 7044 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>, 7045 Requires<[HasNEON]>; 7046 7047def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 7048 7049def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 7050 7051def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 7052 7053def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 7054 7055def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 7056 7057def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16 7058 7059// VEXT : Vector Extract 7060 7061 7062// All of these have a two-operand InstAlias. 7063let TwoOperandAliasConstraint = "$Vn = $Vd" in { 7064class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 7065 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 7066 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 7067 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 7068 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 7069 (Ty DPR:$Vm), imm:$index)))]> { 7070 bits<3> index; 7071 let Inst{11} = 0b0; 7072 let Inst{10-8} = index{2-0}; 7073} 7074 7075class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 7076 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 7077 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 7078 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 7079 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 7080 (Ty QPR:$Vm), imm:$index)))]> { 7081 bits<4> index; 7082 let Inst{11-8} = index{3-0}; 7083} 7084} 7085 7086def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 7087 let Inst{10-8} = index{2-0}; 7088} 7089def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 7090 let Inst{10-9} = index{1-0}; 7091 let Inst{8} = 0b0; 7092} 7093let Predicates = [HasNEON] in { 7094def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))), 7095 (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>; 7096} 7097 7098def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 7099 let Inst{10} = index{0}; 7100 let Inst{9-8} = 0b00; 7101} 7102let Predicates = [HasNEON] in { 7103def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))), 7104 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 7105} 7106 7107def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 7108 let Inst{11-8} = index{3-0}; 7109} 7110def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 7111 let Inst{11-9} = index{2-0}; 7112 let Inst{8} = 0b0; 7113} 7114let Predicates = [HasNEON] in { 7115def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))), 7116 (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>; 7117} 7118 7119def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 7120 let Inst{11-10} = index{1-0}; 7121 let Inst{9-8} = 0b00; 7122} 7123def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 7124 let Inst{11} = index{0}; 7125 let Inst{10-8} = 0b000; 7126} 7127let Predicates = [HasNEON] in { 7128def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), 7129 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 7130} 7131 7132// VTRN : Vector Transpose 7133 7134def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 7135def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 7136def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 7137 7138def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 7139def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 7140def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 7141 7142// VUZP : Vector Unzip (Deinterleave) 7143 7144def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 7145def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 7146// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7147def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 7148 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7149 7150def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 7151def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 7152def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 7153 7154// VZIP : Vector Zip (Interleave) 7155 7156def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 7157def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 7158// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7159def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 7160 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7161 7162def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 7163def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 7164def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 7165 7166// Vector Table Lookup and Table Extension. 7167 7168// VTBL : Vector Table Lookup 7169let DecoderMethod = "DecodeTBLInstruction" in { 7170def VTBL1 7171 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 7172 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 7173 "vtbl", "8", "$Vd, $Vn, $Vm", "", 7174 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 7175 7176let hasExtraSrcRegAllocReq = 1 in { 7177def VTBL2 7178 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 7179 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 7180 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7181def VTBL3 7182 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 7183 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 7184 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7185def VTBL4 7186 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 7187 (ins VecListFourD:$Vn, DPR:$Vm), 7188 NVTBLFrm, IIC_VTB4, 7189 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7190} // hasExtraSrcRegAllocReq = 1 7191 7192def VTBL3Pseudo 7193 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 7194def VTBL4Pseudo 7195 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 7196 7197// VTBX : Vector Table Extension 7198def VTBX1 7199 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 7200 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 7201 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 7202 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 7203 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 7204let hasExtraSrcRegAllocReq = 1 in { 7205def VTBX2 7206 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 7207 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 7208 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 7209def VTBX3 7210 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 7211 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 7212 NVTBLFrm, IIC_VTBX3, 7213 "vtbx", "8", "$Vd, $Vn, $Vm", 7214 "$orig = $Vd", []>; 7215def VTBX4 7216 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 7217 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 7218 "vtbx", "8", "$Vd, $Vn, $Vm", 7219 "$orig = $Vd", []>; 7220} // hasExtraSrcRegAllocReq = 1 7221 7222def VTBX3Pseudo 7223 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7224 IIC_VTBX3, "$orig = $dst", []>; 7225def VTBX4Pseudo 7226 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7227 IIC_VTBX4, "$orig = $dst", []>; 7228} // DecoderMethod = "DecodeTBLInstruction" 7229 7230let Predicates = [HasNEON] in { 7231def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), 7232 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7233 v8i8:$Vn1, dsub_1), 7234 v8i8:$Vm))>; 7235def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7236 v8i8:$Vm)), 7237 (v8i8 (VTBX2 v8i8:$orig, 7238 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7239 v8i8:$Vn1, dsub_1), 7240 v8i8:$Vm))>; 7241 7242def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, 7243 v8i8:$Vn2, v8i8:$Vm)), 7244 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7245 v8i8:$Vn1, dsub_1, 7246 v8i8:$Vn2, dsub_2, 7247 (v8i8 (IMPLICIT_DEF)), dsub_3), 7248 v8i8:$Vm))>; 7249def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7250 v8i8:$Vn2, v8i8:$Vm)), 7251 (v8i8 (VTBX3Pseudo v8i8:$orig, 7252 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7253 v8i8:$Vn1, dsub_1, 7254 v8i8:$Vn2, dsub_2, 7255 (v8i8 (IMPLICIT_DEF)), dsub_3), 7256 v8i8:$Vm))>; 7257 7258def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, 7259 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7260 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7261 v8i8:$Vn1, dsub_1, 7262 v8i8:$Vn2, dsub_2, 7263 v8i8:$Vn3, dsub_3), 7264 v8i8:$Vm))>; 7265def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7266 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7267 (v8i8 (VTBX4Pseudo v8i8:$orig, 7268 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7269 v8i8:$Vn1, dsub_1, 7270 v8i8:$Vn2, dsub_2, 7271 v8i8:$Vn3, dsub_3), 7272 v8i8:$Vm))>; 7273} 7274 7275// VRINT : Vector Rounding 7276multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 7277 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 7278 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7279 !strconcat("vrint", op), "f32", 7280 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 7281 let Inst{9-7} = op9_7; 7282 } 7283 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7284 !strconcat("vrint", op), "f32", 7285 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 7286 let Inst{9-7} = op9_7; 7287 } 7288 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7289 !strconcat("vrint", op), "f16", 7290 v4f16, v4f16, Int>, 7291 Requires<[HasV8, HasNEON, HasFullFP16]> { 7292 let Inst{9-7} = op9_7; 7293 } 7294 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7295 !strconcat("vrint", op), "f16", 7296 v8f16, v8f16, Int>, 7297 Requires<[HasV8, HasNEON, HasFullFP16]> { 7298 let Inst{9-7} = op9_7; 7299 } 7300 } 7301 7302 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 7303 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>; 7304 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 7305 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; 7306 let Predicates = [HasNEON, HasFullFP16] in { 7307 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"), 7308 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; 7309 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"), 7310 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; 7311 } 7312} 7313 7314defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 7315defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 7316defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 7317defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 7318defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 7319defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 7320 7321// Cryptography instructions 7322let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 7323 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 7324 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 7325 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7326 !strconcat("aes", op), "8", v16i8, v16i8, Int>; 7327 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 7328 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7329 !strconcat("aes", op), "8", v16i8, v16i8, Int>; 7330 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7331 SDPatternOperator Int> 7332 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7333 !strconcat("sha", op), "32", v4i32, v4i32, Int>; 7334 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7335 SDPatternOperator Int> 7336 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7337 !strconcat("sha", op), "32", v4i32, v4i32, Int>; 7338 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 7339 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 7340 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>; 7341} 7342 7343let Predicates = [HasV8, HasAES] in { 7344def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 7345def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 7346def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 7347def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 7348} 7349 7350let Predicates = [HasV8, HasSHA2] in { 7351def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 7352def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 7353def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 7354def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 7355def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 7356def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 7357def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 7358def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 7359def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 7360def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 7361} 7362 7363let Predicates = [HasNEON] in { 7364def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 7365 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 7366 (SHA1H (SUBREG_TO_REG (i64 0), 7367 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 7368 ssub_0)), 7369 ssub_0)), GPR)>; 7370 7371def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7372 (SHA1C v4i32:$hash_abcd, 7373 (SUBREG_TO_REG (i64 0), 7374 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7375 ssub_0), 7376 v4i32:$wk)>; 7377 7378def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7379 (SHA1M v4i32:$hash_abcd, 7380 (SUBREG_TO_REG (i64 0), 7381 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7382 ssub_0), 7383 v4i32:$wk)>; 7384 7385def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7386 (SHA1P v4i32:$hash_abcd, 7387 (SUBREG_TO_REG (i64 0), 7388 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7389 ssub_0), 7390 v4i32:$wk)>; 7391} 7392 7393//===----------------------------------------------------------------------===// 7394// NEON instructions for single-precision FP math 7395//===----------------------------------------------------------------------===// 7396 7397class N2VSPat<SDNode OpNode, NeonI Inst> 7398 : NEONFPPat<(f32 (OpNode SPR:$a)), 7399 (EXTRACT_SUBREG 7400 (v2f32 (COPY_TO_REGCLASS (Inst 7401 (INSERT_SUBREG 7402 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7403 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 7404 7405class N3VSPat<SDNode OpNode, NeonI Inst> 7406 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 7407 (EXTRACT_SUBREG 7408 (v2f32 (COPY_TO_REGCLASS (Inst 7409 (INSERT_SUBREG 7410 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7411 SPR:$a, ssub_0), 7412 (INSERT_SUBREG 7413 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7414 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7415 7416class N3VSPatFP16<SDNode OpNode, NeonI Inst> 7417 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)), 7418 (EXTRACT_SUBREG 7419 (v4f16 (COPY_TO_REGCLASS (Inst 7420 (INSERT_SUBREG 7421 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7422 HPR:$a, ssub_0), 7423 (INSERT_SUBREG 7424 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7425 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7426 7427class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 7428 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 7429 (EXTRACT_SUBREG 7430 (v2f32 (COPY_TO_REGCLASS (Inst 7431 (INSERT_SUBREG 7432 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7433 SPR:$acc, ssub_0), 7434 (INSERT_SUBREG 7435 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7436 SPR:$a, ssub_0), 7437 (INSERT_SUBREG 7438 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7439 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7440 7441class NVCVTIFPat<SDNode OpNode, NeonI Inst> 7442 : NEONFPPat<(f32 (OpNode GPR:$a)), 7443 (f32 (EXTRACT_SUBREG 7444 (v2f32 (Inst 7445 (INSERT_SUBREG 7446 (v2f32 (IMPLICIT_DEF)), 7447 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 7448 ssub_0))>; 7449class NVCVTFIPat<SDNode OpNode, NeonI Inst> 7450 : NEONFPPat<(i32 (OpNode SPR:$a)), 7451 (i32 (EXTRACT_SUBREG 7452 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 7453 SPR:$a, ssub_0))), 7454 ssub_0))>; 7455 7456def : N3VSPat<fadd, VADDfd>; 7457def : N3VSPat<fsub, VSUBfd>; 7458def : N3VSPat<fmul, VMULfd>; 7459def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 7460 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7461def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 7462 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7463def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 7464 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7465def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 7466 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7467def : N2VSPat<fabs, VABSfd>; 7468def : N2VSPat<fneg, VNEGfd>; 7469def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>; 7470def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>; 7471def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>; 7472def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>; 7473def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 7474def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 7475def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 7476def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 7477 7478// NEON doesn't have any f64 conversions, so provide patterns to make 7479// sure the VFP conversions match when extracting from a vector. 7480def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7481 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7482def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7483 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7484def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7485 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7486def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7487 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7488 7489 7490// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 7491def : Pat<(f32 (bitconvert GPR:$a)), 7492 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7493 Requires<[HasNEON, DontUseVMOVSR]>; 7494def : Pat<(arm_vmovsr GPR:$a), 7495 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7496 Requires<[HasNEON, DontUseVMOVSR]>; 7497 7498//===----------------------------------------------------------------------===// 7499// Non-Instruction Patterns or Endianess - Revert Patterns 7500//===----------------------------------------------------------------------===// 7501 7502// bit_convert 7503// 64 bit conversions 7504let Predicates = [HasNEON] in { 7505def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 7506def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 7507 7508def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 7509def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 7510 7511def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>; 7512def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>; 7513 7514def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16 DPR:$src)>; 7515def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16 DPR:$src)>; 7516 7517// 128 bit conversions 7518def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 7519def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 7520 7521def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 7522def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 7523 7524def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; 7525def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>; 7526 7527def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16 QPR:$src)>; 7528def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16 QPR:$src)>; 7529} 7530 7531let Predicates = [IsLE,HasNEON] in { 7532 // 64 bit conversions 7533 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 7534 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 7535 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>; 7536 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (f64 DPR:$src)>; 7537 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 7538 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 7539 7540 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 7541 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 7542 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>; 7543 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>; 7544 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 7545 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 7546 7547 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 7548 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 7549 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>; 7550 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>; 7551 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 7552 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 7553 7554 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 7555 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 7556 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>; 7557 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>; 7558 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 7559 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 7560 7561 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>; 7562 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>; 7563 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>; 7564 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>; 7565 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>; 7566 7567 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (v4bf16 DPR:$src)>; 7568 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>; 7569 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>; 7570 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>; 7571 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (v4bf16 DPR:$src)>; 7572 7573 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 7574 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 7575 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 7576 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 7577 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 7578 7579 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 7580 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 7581 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 7582 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 7583 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>; 7584 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (v8i8 DPR:$src)>; 7585 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 7586 7587 // 128 bit conversions 7588 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 7589 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 7590 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; 7591 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>; 7592 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 7593 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 7594 7595 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 7596 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 7597 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>; 7598 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>; 7599 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 7600 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 7601 7602 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 7603 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 7604 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>; 7605 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>; 7606 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 7607 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 7608 7609 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 7610 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 7611 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>; 7612 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>; 7613 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 7614 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 7615 7616 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; 7617 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>; 7618 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>; 7619 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>; 7620 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>; 7621 7622 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>; 7623 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>; 7624 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>; 7625 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>; 7626 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>; 7627 7628 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 7629 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 7630 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 7631 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 7632 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 7633 7634 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 7635 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 7636 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 7637 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 7638 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; 7639 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>; 7640 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 7641} 7642 7643let Predicates = [IsBE,HasNEON] in { 7644 // 64 bit conversions 7645 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7646 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7647 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7648 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7649 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7650 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7651 7652 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7653 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7654 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7655 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7656 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7657 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7658 7659 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7660 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7661 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7662 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7663 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7664 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7665 7666 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7667 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7668 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7669 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7670 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7671 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7672 7673 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7674 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7675 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7676 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7677 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7678 7679 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7680 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7681 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7682 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7683 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7684 7685 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7686 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7687 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7688 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7689 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7690 7691 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 7692 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 7693 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 7694 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 7695 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>; 7696 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (VREV16d8 DPR:$src)>; 7697 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 7698 7699 // 128 bit conversions 7700 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7701 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7702 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7703 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7704 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7705 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7706 7707 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7708 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7709 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7710 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7711 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7712 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7713 7714 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7715 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7716 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7717 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7718 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7719 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7720 7721 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7722 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7723 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7724 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7725 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7726 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7727 7728 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7729 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7730 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7731 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7732 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7733 7734 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7735 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7736 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7737 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7738 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7739 7740 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7741 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7742 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7743 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7744 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7745 7746 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 7747 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 7748 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 7749 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 7750 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>; 7751 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8 QPR:$src)>; 7752 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 7753} 7754 7755let Predicates = [HasNEON] in { 7756 // Here we match the specific SDNode type 'ARMVectorRegCastImpl' 7757 // rather than the more general 'ARMVectorRegCast' which would also 7758 // match some bitconverts. If we use the latter in cases where the 7759 // input and output types are the same, the bitconvert gets elided 7760 // and we end up generating a nonsense match of nothing. 7761 7762 foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7763 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7764 def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>; 7765 7766 foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7767 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7768 def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>; 7769} 7770 7771// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian 7772let Predicates = [IsBE,HasNEON] in { 7773def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 7774 (VREV64q8 (VLD1q8 addrmode6:$addr))>; 7775def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7776 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>; 7777def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 7778 (VREV64q16 (VLD1q16 addrmode6:$addr))>; 7779def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7780 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>; 7781} 7782 7783// Fold extracting an element out of a v2i32 into a vfp register. 7784def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 7785 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>, 7786 Requires<[HasNEON]>; 7787 7788// Vector lengthening move with load, matching extending loads. 7789 7790// extload, zextload and sextload for a standard lengthening load. Example: 7791// Lengthen_Single<"8", "i16", "8"> = 7792// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 7793// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 7794// (f64 (IMPLICIT_DEF)), (i32 0)))>; 7795multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 7796 let AddedComplexity = 10 in { 7797 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7798 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 7799 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7800 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7801 Requires<[HasNEON]>; 7802 7803 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7804 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 7805 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7806 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7807 Requires<[HasNEON]>; 7808 7809 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7810 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 7811 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 7812 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7813 Requires<[HasNEON]>; 7814 } 7815} 7816 7817// extload, zextload and sextload for a lengthening load which only uses 7818// half the lanes available. Example: 7819// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 7820// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 7821// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7822// (f64 (IMPLICIT_DEF)), (i32 0))), 7823// dsub_0)>; 7824multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 7825 string InsnLanes, string InsnTy> { 7826 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7827 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7828 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7829 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7830 dsub_0)>, 7831 Requires<[HasNEON]>; 7832 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7833 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7834 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7835 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7836 dsub_0)>, 7837 Requires<[HasNEON]>; 7838 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7839 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7840 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7841 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7842 dsub_0)>, 7843 Requires<[HasNEON]>; 7844} 7845 7846// The following class definition is basically a copy of the 7847// Lengthen_HalfSingle definition above, however with an additional parameter 7848// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7849// data loaded by VLD1LN into proper vector format in big endian mode. 7850multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7851 string InsnLanes, string InsnTy, string RevLanes> { 7852 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7853 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7854 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7855 (!cast<Instruction>("VREV32d" # RevLanes) 7856 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7857 dsub_0)>, 7858 Requires<[HasNEON]>; 7859 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7860 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7861 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7862 (!cast<Instruction>("VREV32d" # RevLanes) 7863 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7864 dsub_0)>, 7865 Requires<[HasNEON]>; 7866 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7867 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7868 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7869 (!cast<Instruction>("VREV32d" # RevLanes) 7870 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7871 dsub_0)>, 7872 Requires<[HasNEON]>; 7873} 7874 7875// extload, zextload and sextload for a lengthening load followed by another 7876// lengthening load, to quadruple the initial length. 7877// 7878// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 7879// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 7880// (EXTRACT_SUBREG (VMOVLuv4i32 7881// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7882// (f64 (IMPLICIT_DEF)), 7883// (i32 0))), 7884// dsub_0)), 7885// dsub_0)>; 7886multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 7887 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7888 string Insn2Ty> { 7889 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7890 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7891 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7892 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7893 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7894 dsub_0))>, 7895 Requires<[HasNEON]>; 7896 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7897 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7898 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7899 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7900 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7901 dsub_0))>, 7902 Requires<[HasNEON]>; 7903 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7904 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7905 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7906 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7907 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7908 dsub_0))>, 7909 Requires<[HasNEON]>; 7910} 7911 7912// The following class definition is basically a copy of the 7913// Lengthen_Double definition above, however with an additional parameter 7914// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7915// data loaded by VLD1LN into proper vector format in big endian mode. 7916multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7917 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7918 string Insn2Ty, string RevLanes> { 7919 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7920 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7921 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7922 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7923 (!cast<Instruction>("VREV32d" # RevLanes) 7924 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7925 dsub_0))>, 7926 Requires<[HasNEON]>; 7927 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7928 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7929 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7930 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7931 (!cast<Instruction>("VREV32d" # RevLanes) 7932 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7933 dsub_0))>, 7934 Requires<[HasNEON]>; 7935 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7936 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7937 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7938 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7939 (!cast<Instruction>("VREV32d" # RevLanes) 7940 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7941 dsub_0))>, 7942 Requires<[HasNEON]>; 7943} 7944 7945// extload, zextload and sextload for a lengthening load followed by another 7946// lengthening load, to quadruple the initial length, but which ends up only 7947// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 7948// 7949// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 7950// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 7951// (EXTRACT_SUBREG (VMOVLuv4i32 7952// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 7953// (f64 (IMPLICIT_DEF)), (i32 0))), 7954// dsub_0)), 7955// dsub_0)>; 7956multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 7957 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7958 string Insn2Ty> { 7959 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7960 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7961 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7962 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7963 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7964 dsub_0)), 7965 dsub_0)>, 7966 Requires<[HasNEON]>; 7967 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7968 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7969 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7970 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7971 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7972 dsub_0)), 7973 dsub_0)>, 7974 Requires<[HasNEON]>; 7975 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7976 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7977 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7978 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7979 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7980 dsub_0)), 7981 dsub_0)>, 7982 Requires<[HasNEON]>; 7983} 7984 7985// The following class definition is basically a copy of the 7986// Lengthen_HalfDouble definition above, however with an additional VREV16d8 7987// instruction to convert data loaded by VLD1LN into proper vector format 7988// in big endian mode. 7989multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7990 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7991 string Insn2Ty> { 7992 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7993 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7994 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7995 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7996 (!cast<Instruction>("VREV16d8") 7997 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7998 dsub_0)), 7999 dsub_0)>, 8000 Requires<[HasNEON]>; 8001 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 8002 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 8003 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 8004 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 8005 (!cast<Instruction>("VREV16d8") 8006 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 8007 dsub_0)), 8008 dsub_0)>, 8009 Requires<[HasNEON]>; 8010 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 8011 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 8012 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 8013 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 8014 (!cast<Instruction>("VREV16d8") 8015 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 8016 dsub_0)), 8017 dsub_0)>, 8018 Requires<[HasNEON]>; 8019} 8020 8021defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 8022defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 8023defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 8024 8025let Predicates = [HasNEON,IsLE] in { 8026 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 8027 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 8028 8029 // Double lengthening - v4i8 -> v4i16 -> v4i32 8030 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 8031 // v2i8 -> v2i16 -> v2i32 8032 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 8033 // v2i16 -> v2i32 -> v2i64 8034 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 8035} 8036 8037let Predicates = [HasNEON,IsBE] in { 8038 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 8039 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 8040 8041 // Double lengthening - v4i8 -> v4i16 -> v4i32 8042 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 8043 // v2i8 -> v2i16 -> v2i32 8044 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 8045 // v2i16 -> v2i32 -> v2i64 8046 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 8047} 8048 8049// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 8050let Predicates = [HasNEON,IsLE] in { 8051 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 8052 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8053 (VLD1LNd16 addrmode6:$addr, 8054 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 8055 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 8056 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8057 (VLD1LNd16 addrmode6:$addr, 8058 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 8059 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 8060 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 8061 (VLD1LNd16 addrmode6:$addr, 8062 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 8063} 8064// The following patterns are basically a copy of the patterns above, 8065// however with an additional VREV16d instruction to convert data 8066// loaded by VLD1LN into proper vector format in big endian mode. 8067let Predicates = [HasNEON,IsBE] in { 8068 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 8069 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8070 (!cast<Instruction>("VREV16d8") 8071 (VLD1LNd16 addrmode6:$addr, 8072 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 8073 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 8074 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 8075 (!cast<Instruction>("VREV16d8") 8076 (VLD1LNd16 addrmode6:$addr, 8077 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 8078 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 8079 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 8080 (!cast<Instruction>("VREV16d8") 8081 (VLD1LNd16 addrmode6:$addr, 8082 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 8083} 8084 8085let Predicates = [HasNEON] in { 8086def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), 8087 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8088def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), 8089 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8090def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8091 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8092def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), 8093 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8094def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), 8095 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8096def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8097 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8098def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8099 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8100} 8101 8102//===----------------------------------------------------------------------===// 8103// Assembler aliases 8104// 8105 8106def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 8107 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 8108def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 8109 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 8110 8111// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 8112defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 8113 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8114defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 8115 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8116defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8117 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8118defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8119 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8120defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8121 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8122defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8123 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8124defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8125 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8126defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8127 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8128// ... two-operand aliases 8129defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8130 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8131defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8132 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8133defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8134 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8135defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8136 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8137defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8138 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8139defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8140 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8141// ... immediates 8142def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8143 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8144def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8145 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8146def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8147 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8148def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8149 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8150 8151 8152// VLD1 single-lane pseudo-instructions. These need special handling for 8153// the lane index that an InstAlias can't handle, so we use these instead. 8154def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 8155 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8156 pred:$p)>; 8157def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 8158 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8159 pred:$p)>; 8160def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 8161 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8162 pred:$p)>; 8163 8164def VLD1LNdWB_fixed_Asm_8 : 8165 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 8166 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8167 pred:$p)>; 8168def VLD1LNdWB_fixed_Asm_16 : 8169 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 8170 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8171 pred:$p)>; 8172def VLD1LNdWB_fixed_Asm_32 : 8173 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 8174 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8175 pred:$p)>; 8176def VLD1LNdWB_register_Asm_8 : 8177 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 8178 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8179 rGPR:$Rm, pred:$p)>; 8180def VLD1LNdWB_register_Asm_16 : 8181 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 8182 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8183 rGPR:$Rm, pred:$p)>; 8184def VLD1LNdWB_register_Asm_32 : 8185 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 8186 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8187 rGPR:$Rm, pred:$p)>; 8188 8189 8190// VST1 single-lane pseudo-instructions. These need special handling for 8191// the lane index that an InstAlias can't handle, so we use these instead. 8192def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 8193 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8194 pred:$p)>; 8195def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 8196 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8197 pred:$p)>; 8198def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 8199 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8200 pred:$p)>; 8201 8202def VST1LNdWB_fixed_Asm_8 : 8203 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 8204 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8205 pred:$p)>; 8206def VST1LNdWB_fixed_Asm_16 : 8207 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 8208 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8209 pred:$p)>; 8210def VST1LNdWB_fixed_Asm_32 : 8211 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 8212 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8213 pred:$p)>; 8214def VST1LNdWB_register_Asm_8 : 8215 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 8216 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8217 rGPR:$Rm, pred:$p)>; 8218def VST1LNdWB_register_Asm_16 : 8219 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 8220 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8221 rGPR:$Rm, pred:$p)>; 8222def VST1LNdWB_register_Asm_32 : 8223 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 8224 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8225 rGPR:$Rm, pred:$p)>; 8226 8227// VLD2 single-lane pseudo-instructions. These need special handling for 8228// the lane index that an InstAlias can't handle, so we use these instead. 8229def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 8230 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8231 pred:$p)>; 8232def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8233 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8234 pred:$p)>; 8235def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8236 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 8237def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8238 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8239 pred:$p)>; 8240def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8241 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8242 pred:$p)>; 8243 8244def VLD2LNdWB_fixed_Asm_8 : 8245 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 8246 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8247 pred:$p)>; 8248def VLD2LNdWB_fixed_Asm_16 : 8249 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8250 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8251 pred:$p)>; 8252def VLD2LNdWB_fixed_Asm_32 : 8253 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8254 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8255 pred:$p)>; 8256def VLD2LNqWB_fixed_Asm_16 : 8257 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8258 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8259 pred:$p)>; 8260def VLD2LNqWB_fixed_Asm_32 : 8261 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8262 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8263 pred:$p)>; 8264def VLD2LNdWB_register_Asm_8 : 8265 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 8266 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8267 rGPR:$Rm, pred:$p)>; 8268def VLD2LNdWB_register_Asm_16 : 8269 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8270 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8271 rGPR:$Rm, pred:$p)>; 8272def VLD2LNdWB_register_Asm_32 : 8273 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8274 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8275 rGPR:$Rm, pred:$p)>; 8276def VLD2LNqWB_register_Asm_16 : 8277 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8278 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8279 rGPR:$Rm, pred:$p)>; 8280def VLD2LNqWB_register_Asm_32 : 8281 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8282 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8283 rGPR:$Rm, pred:$p)>; 8284 8285 8286// VST2 single-lane pseudo-instructions. These need special handling for 8287// the lane index that an InstAlias can't handle, so we use these instead. 8288def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 8289 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8290 pred:$p)>; 8291def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8292 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8293 pred:$p)>; 8294def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8295 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8296 pred:$p)>; 8297def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8298 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8299 pred:$p)>; 8300def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8301 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8302 pred:$p)>; 8303 8304def VST2LNdWB_fixed_Asm_8 : 8305 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 8306 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8307 pred:$p)>; 8308def VST2LNdWB_fixed_Asm_16 : 8309 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8310 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8311 pred:$p)>; 8312def VST2LNdWB_fixed_Asm_32 : 8313 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8314 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8315 pred:$p)>; 8316def VST2LNqWB_fixed_Asm_16 : 8317 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8318 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8319 pred:$p)>; 8320def VST2LNqWB_fixed_Asm_32 : 8321 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8322 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8323 pred:$p)>; 8324def VST2LNdWB_register_Asm_8 : 8325 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 8326 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8327 rGPR:$Rm, pred:$p)>; 8328def VST2LNdWB_register_Asm_16 : 8329 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8330 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8331 rGPR:$Rm, pred:$p)>; 8332def VST2LNdWB_register_Asm_32 : 8333 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8334 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8335 rGPR:$Rm, pred:$p)>; 8336def VST2LNqWB_register_Asm_16 : 8337 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8338 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8339 rGPR:$Rm, pred:$p)>; 8340def VST2LNqWB_register_Asm_32 : 8341 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8342 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8343 rGPR:$Rm, pred:$p)>; 8344 8345// VLD3 all-lanes pseudo-instructions. These need special handling for 8346// the lane index that an InstAlias can't handle, so we use these instead. 8347def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8348 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8349 pred:$p)>; 8350def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8351 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8352 pred:$p)>; 8353def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8354 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8355 pred:$p)>; 8356def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8357 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8358 pred:$p)>; 8359def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8360 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8361 pred:$p)>; 8362def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8363 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8364 pred:$p)>; 8365 8366def VLD3DUPdWB_fixed_Asm_8 : 8367 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8368 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8369 pred:$p)>; 8370def VLD3DUPdWB_fixed_Asm_16 : 8371 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8372 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8373 pred:$p)>; 8374def VLD3DUPdWB_fixed_Asm_32 : 8375 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8376 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8377 pred:$p)>; 8378def VLD3DUPqWB_fixed_Asm_8 : 8379 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8380 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8381 pred:$p)>; 8382def VLD3DUPqWB_fixed_Asm_16 : 8383 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8384 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8385 pred:$p)>; 8386def VLD3DUPqWB_fixed_Asm_32 : 8387 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8388 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8389 pred:$p)>; 8390def VLD3DUPdWB_register_Asm_8 : 8391 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8392 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8393 rGPR:$Rm, pred:$p)>; 8394def VLD3DUPdWB_register_Asm_16 : 8395 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8396 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8397 rGPR:$Rm, pred:$p)>; 8398def VLD3DUPdWB_register_Asm_32 : 8399 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8400 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8401 rGPR:$Rm, pred:$p)>; 8402def VLD3DUPqWB_register_Asm_8 : 8403 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8404 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8405 rGPR:$Rm, pred:$p)>; 8406def VLD3DUPqWB_register_Asm_16 : 8407 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8408 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8409 rGPR:$Rm, pred:$p)>; 8410def VLD3DUPqWB_register_Asm_32 : 8411 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8412 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8413 rGPR:$Rm, pred:$p)>; 8414 8415 8416// VLD3 single-lane pseudo-instructions. These need special handling for 8417// the lane index that an InstAlias can't handle, so we use these instead. 8418def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8419 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8420 pred:$p)>; 8421def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8422 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8423 pred:$p)>; 8424def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8425 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8426 pred:$p)>; 8427def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8428 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8429 pred:$p)>; 8430def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8431 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8432 pred:$p)>; 8433 8434def VLD3LNdWB_fixed_Asm_8 : 8435 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8436 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8437 pred:$p)>; 8438def VLD3LNdWB_fixed_Asm_16 : 8439 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8440 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8441 pred:$p)>; 8442def VLD3LNdWB_fixed_Asm_32 : 8443 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8444 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8445 pred:$p)>; 8446def VLD3LNqWB_fixed_Asm_16 : 8447 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8448 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8449 pred:$p)>; 8450def VLD3LNqWB_fixed_Asm_32 : 8451 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8452 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8453 pred:$p)>; 8454def VLD3LNdWB_register_Asm_8 : 8455 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8456 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8457 rGPR:$Rm, pred:$p)>; 8458def VLD3LNdWB_register_Asm_16 : 8459 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8460 (ins VecListThreeDHWordIndexed:$list, 8461 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8462def VLD3LNdWB_register_Asm_32 : 8463 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8464 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8465 rGPR:$Rm, pred:$p)>; 8466def VLD3LNqWB_register_Asm_16 : 8467 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8468 (ins VecListThreeQHWordIndexed:$list, 8469 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8470def VLD3LNqWB_register_Asm_32 : 8471 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8472 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8473 rGPR:$Rm, pred:$p)>; 8474 8475// VLD3 multiple structure pseudo-instructions. These need special handling for 8476// the vector operands that the normal instructions don't yet model. 8477// FIXME: Remove these when the register classes and instructions are updated. 8478def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8479 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8480def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8481 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8482def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8483 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8484def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8485 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8486def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8487 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8488def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8489 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8490 8491def VLD3dWB_fixed_Asm_8 : 8492 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8493 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8494def VLD3dWB_fixed_Asm_16 : 8495 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8496 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8497def VLD3dWB_fixed_Asm_32 : 8498 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8499 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8500def VLD3qWB_fixed_Asm_8 : 8501 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8502 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8503def VLD3qWB_fixed_Asm_16 : 8504 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8505 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8506def VLD3qWB_fixed_Asm_32 : 8507 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8508 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8509def VLD3dWB_register_Asm_8 : 8510 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8511 (ins VecListThreeD:$list, addrmode6align64:$addr, 8512 rGPR:$Rm, pred:$p)>; 8513def VLD3dWB_register_Asm_16 : 8514 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8515 (ins VecListThreeD:$list, addrmode6align64:$addr, 8516 rGPR:$Rm, pred:$p)>; 8517def VLD3dWB_register_Asm_32 : 8518 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8519 (ins VecListThreeD:$list, addrmode6align64:$addr, 8520 rGPR:$Rm, pred:$p)>; 8521def VLD3qWB_register_Asm_8 : 8522 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8523 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8524 rGPR:$Rm, pred:$p)>; 8525def VLD3qWB_register_Asm_16 : 8526 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8527 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8528 rGPR:$Rm, pred:$p)>; 8529def VLD3qWB_register_Asm_32 : 8530 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8531 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8532 rGPR:$Rm, pred:$p)>; 8533 8534// VST3 single-lane pseudo-instructions. These need special handling for 8535// the lane index that an InstAlias can't handle, so we use these instead. 8536def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8537 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8538 pred:$p)>; 8539def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8540 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8541 pred:$p)>; 8542def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8543 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8544 pred:$p)>; 8545def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8546 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8547 pred:$p)>; 8548def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8549 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8550 pred:$p)>; 8551 8552def VST3LNdWB_fixed_Asm_8 : 8553 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8554 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8555 pred:$p)>; 8556def VST3LNdWB_fixed_Asm_16 : 8557 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8558 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8559 pred:$p)>; 8560def VST3LNdWB_fixed_Asm_32 : 8561 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8562 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8563 pred:$p)>; 8564def VST3LNqWB_fixed_Asm_16 : 8565 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8566 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8567 pred:$p)>; 8568def VST3LNqWB_fixed_Asm_32 : 8569 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8570 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8571 pred:$p)>; 8572def VST3LNdWB_register_Asm_8 : 8573 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8574 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8575 rGPR:$Rm, pred:$p)>; 8576def VST3LNdWB_register_Asm_16 : 8577 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8578 (ins VecListThreeDHWordIndexed:$list, 8579 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8580def VST3LNdWB_register_Asm_32 : 8581 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8582 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8583 rGPR:$Rm, pred:$p)>; 8584def VST3LNqWB_register_Asm_16 : 8585 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8586 (ins VecListThreeQHWordIndexed:$list, 8587 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8588def VST3LNqWB_register_Asm_32 : 8589 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8590 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8591 rGPR:$Rm, pred:$p)>; 8592 8593 8594// VST3 multiple structure pseudo-instructions. These need special handling for 8595// the vector operands that the normal instructions don't yet model. 8596// FIXME: Remove these when the register classes and instructions are updated. 8597def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8598 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8599def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8600 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8601def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8602 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8603def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8604 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8605def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8606 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8607def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8608 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8609 8610def VST3dWB_fixed_Asm_8 : 8611 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8612 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8613def VST3dWB_fixed_Asm_16 : 8614 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8615 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8616def VST3dWB_fixed_Asm_32 : 8617 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8618 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8619def VST3qWB_fixed_Asm_8 : 8620 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8621 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8622def VST3qWB_fixed_Asm_16 : 8623 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8624 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8625def VST3qWB_fixed_Asm_32 : 8626 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8627 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8628def VST3dWB_register_Asm_8 : 8629 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8630 (ins VecListThreeD:$list, addrmode6align64:$addr, 8631 rGPR:$Rm, pred:$p)>; 8632def VST3dWB_register_Asm_16 : 8633 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8634 (ins VecListThreeD:$list, addrmode6align64:$addr, 8635 rGPR:$Rm, pred:$p)>; 8636def VST3dWB_register_Asm_32 : 8637 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8638 (ins VecListThreeD:$list, addrmode6align64:$addr, 8639 rGPR:$Rm, pred:$p)>; 8640def VST3qWB_register_Asm_8 : 8641 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8642 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8643 rGPR:$Rm, pred:$p)>; 8644def VST3qWB_register_Asm_16 : 8645 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8646 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8647 rGPR:$Rm, pred:$p)>; 8648def VST3qWB_register_Asm_32 : 8649 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8650 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8651 rGPR:$Rm, pred:$p)>; 8652 8653// VLD4 all-lanes pseudo-instructions. These need special handling for 8654// the lane index that an InstAlias can't handle, so we use these instead. 8655def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8656 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8657 pred:$p)>; 8658def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8659 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8660 pred:$p)>; 8661def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8662 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8663 pred:$p)>; 8664def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8665 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8666 pred:$p)>; 8667def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8668 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8669 pred:$p)>; 8670def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8671 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8672 pred:$p)>; 8673 8674def VLD4DUPdWB_fixed_Asm_8 : 8675 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8676 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8677 pred:$p)>; 8678def VLD4DUPdWB_fixed_Asm_16 : 8679 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8680 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8681 pred:$p)>; 8682def VLD4DUPdWB_fixed_Asm_32 : 8683 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8684 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8685 pred:$p)>; 8686def VLD4DUPqWB_fixed_Asm_8 : 8687 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8688 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8689 pred:$p)>; 8690def VLD4DUPqWB_fixed_Asm_16 : 8691 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8692 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8693 pred:$p)>; 8694def VLD4DUPqWB_fixed_Asm_32 : 8695 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8696 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8697 pred:$p)>; 8698def VLD4DUPdWB_register_Asm_8 : 8699 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8700 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8701 rGPR:$Rm, pred:$p)>; 8702def VLD4DUPdWB_register_Asm_16 : 8703 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8704 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8705 rGPR:$Rm, pred:$p)>; 8706def VLD4DUPdWB_register_Asm_32 : 8707 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8708 (ins VecListFourDAllLanes:$list, 8709 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8710def VLD4DUPqWB_register_Asm_8 : 8711 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8712 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8713 rGPR:$Rm, pred:$p)>; 8714def VLD4DUPqWB_register_Asm_16 : 8715 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8716 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8717 rGPR:$Rm, pred:$p)>; 8718def VLD4DUPqWB_register_Asm_32 : 8719 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8720 (ins VecListFourQAllLanes:$list, 8721 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8722 8723 8724// VLD4 single-lane pseudo-instructions. These need special handling for 8725// the lane index that an InstAlias can't handle, so we use these instead. 8726def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8727 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8728 pred:$p)>; 8729def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8730 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8731 pred:$p)>; 8732def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8733 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8734 pred:$p)>; 8735def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8736 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8737 pred:$p)>; 8738def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8739 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8740 pred:$p)>; 8741 8742def VLD4LNdWB_fixed_Asm_8 : 8743 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8744 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8745 pred:$p)>; 8746def VLD4LNdWB_fixed_Asm_16 : 8747 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8748 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8749 pred:$p)>; 8750def VLD4LNdWB_fixed_Asm_32 : 8751 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8752 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8753 pred:$p)>; 8754def VLD4LNqWB_fixed_Asm_16 : 8755 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8756 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8757 pred:$p)>; 8758def VLD4LNqWB_fixed_Asm_32 : 8759 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8760 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8761 pred:$p)>; 8762def VLD4LNdWB_register_Asm_8 : 8763 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8764 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8765 rGPR:$Rm, pred:$p)>; 8766def VLD4LNdWB_register_Asm_16 : 8767 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8768 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8769 rGPR:$Rm, pred:$p)>; 8770def VLD4LNdWB_register_Asm_32 : 8771 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8772 (ins VecListFourDWordIndexed:$list, 8773 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8774def VLD4LNqWB_register_Asm_16 : 8775 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8776 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8777 rGPR:$Rm, pred:$p)>; 8778def VLD4LNqWB_register_Asm_32 : 8779 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8780 (ins VecListFourQWordIndexed:$list, 8781 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8782 8783 8784 8785// VLD4 multiple structure pseudo-instructions. These need special handling for 8786// the vector operands that the normal instructions don't yet model. 8787// FIXME: Remove these when the register classes and instructions are updated. 8788def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8789 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8790 pred:$p)>; 8791def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8792 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8793 pred:$p)>; 8794def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8795 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8796 pred:$p)>; 8797def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8798 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8799 pred:$p)>; 8800def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8801 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8802 pred:$p)>; 8803def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8804 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8805 pred:$p)>; 8806 8807def VLD4dWB_fixed_Asm_8 : 8808 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8809 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8810 pred:$p)>; 8811def VLD4dWB_fixed_Asm_16 : 8812 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8813 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8814 pred:$p)>; 8815def VLD4dWB_fixed_Asm_32 : 8816 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8817 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8818 pred:$p)>; 8819def VLD4qWB_fixed_Asm_8 : 8820 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8821 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8822 pred:$p)>; 8823def VLD4qWB_fixed_Asm_16 : 8824 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8825 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8826 pred:$p)>; 8827def VLD4qWB_fixed_Asm_32 : 8828 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8829 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8830 pred:$p)>; 8831def VLD4dWB_register_Asm_8 : 8832 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8833 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8834 rGPR:$Rm, pred:$p)>; 8835def VLD4dWB_register_Asm_16 : 8836 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8837 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8838 rGPR:$Rm, pred:$p)>; 8839def VLD4dWB_register_Asm_32 : 8840 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8841 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8842 rGPR:$Rm, pred:$p)>; 8843def VLD4qWB_register_Asm_8 : 8844 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8845 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8846 rGPR:$Rm, pred:$p)>; 8847def VLD4qWB_register_Asm_16 : 8848 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8849 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8850 rGPR:$Rm, pred:$p)>; 8851def VLD4qWB_register_Asm_32 : 8852 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8853 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8854 rGPR:$Rm, pred:$p)>; 8855 8856// VST4 single-lane pseudo-instructions. These need special handling for 8857// the lane index that an InstAlias can't handle, so we use these instead. 8858def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8859 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8860 pred:$p)>; 8861def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8862 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8863 pred:$p)>; 8864def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8865 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8866 pred:$p)>; 8867def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8868 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8869 pred:$p)>; 8870def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8871 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8872 pred:$p)>; 8873 8874def VST4LNdWB_fixed_Asm_8 : 8875 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8876 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8877 pred:$p)>; 8878def VST4LNdWB_fixed_Asm_16 : 8879 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8880 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8881 pred:$p)>; 8882def VST4LNdWB_fixed_Asm_32 : 8883 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8884 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8885 pred:$p)>; 8886def VST4LNqWB_fixed_Asm_16 : 8887 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8888 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8889 pred:$p)>; 8890def VST4LNqWB_fixed_Asm_32 : 8891 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8892 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8893 pred:$p)>; 8894def VST4LNdWB_register_Asm_8 : 8895 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8896 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8897 rGPR:$Rm, pred:$p)>; 8898def VST4LNdWB_register_Asm_16 : 8899 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8900 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8901 rGPR:$Rm, pred:$p)>; 8902def VST4LNdWB_register_Asm_32 : 8903 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8904 (ins VecListFourDWordIndexed:$list, 8905 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8906def VST4LNqWB_register_Asm_16 : 8907 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8908 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8909 rGPR:$Rm, pred:$p)>; 8910def VST4LNqWB_register_Asm_32 : 8911 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8912 (ins VecListFourQWordIndexed:$list, 8913 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8914 8915 8916// VST4 multiple structure pseudo-instructions. These need special handling for 8917// the vector operands that the normal instructions don't yet model. 8918// FIXME: Remove these when the register classes and instructions are updated. 8919def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8920 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8921 pred:$p)>; 8922def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8923 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8924 pred:$p)>; 8925def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8926 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8927 pred:$p)>; 8928def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8929 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8930 pred:$p)>; 8931def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8932 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8933 pred:$p)>; 8934def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8935 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8936 pred:$p)>; 8937 8938def VST4dWB_fixed_Asm_8 : 8939 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8940 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8941 pred:$p)>; 8942def VST4dWB_fixed_Asm_16 : 8943 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8944 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8945 pred:$p)>; 8946def VST4dWB_fixed_Asm_32 : 8947 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8948 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8949 pred:$p)>; 8950def VST4qWB_fixed_Asm_8 : 8951 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8952 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8953 pred:$p)>; 8954def VST4qWB_fixed_Asm_16 : 8955 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8956 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8957 pred:$p)>; 8958def VST4qWB_fixed_Asm_32 : 8959 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8960 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8961 pred:$p)>; 8962def VST4dWB_register_Asm_8 : 8963 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8964 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8965 rGPR:$Rm, pred:$p)>; 8966def VST4dWB_register_Asm_16 : 8967 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8968 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8969 rGPR:$Rm, pred:$p)>; 8970def VST4dWB_register_Asm_32 : 8971 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8972 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8973 rGPR:$Rm, pred:$p)>; 8974def VST4qWB_register_Asm_8 : 8975 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8976 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8977 rGPR:$Rm, pred:$p)>; 8978def VST4qWB_register_Asm_16 : 8979 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8980 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8981 rGPR:$Rm, pred:$p)>; 8982def VST4qWB_register_Asm_32 : 8983 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8984 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8985 rGPR:$Rm, pred:$p)>; 8986 8987// VMOV/VMVN takes an optional datatype suffix 8988defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8989 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 8990defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8991 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 8992 8993defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8994 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 8995defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8996 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 8997 8998// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8999// D-register versions. 9000def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 9001 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9002def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 9003 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9004def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 9005 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9006def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 9007 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9008def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 9009 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9010def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 9011 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9012def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 9013 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9014let Predicates = [HasNEON, HasFullFP16] in 9015def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", 9016 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9017// Q-register versions. 9018def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 9019 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9020def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 9021 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9022def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 9023 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9024def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 9025 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9026def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 9027 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9028def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 9029 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9030def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 9031 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9032let Predicates = [HasNEON, HasFullFP16] in 9033def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", 9034 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9035 9036// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 9037// D-register versions. 9038def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 9039 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9040def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 9041 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9042def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 9043 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9044def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 9045 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9046def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 9047 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9048def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 9049 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9050def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 9051 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9052let Predicates = [HasNEON, HasFullFP16] in 9053def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", 9054 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 9055// Q-register versions. 9056def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 9057 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9058def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 9059 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9060def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 9061 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9062def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 9063 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9064def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 9065 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9066def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 9067 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9068def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 9069 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9070let Predicates = [HasNEON, HasFullFP16] in 9071def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", 9072 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 9073 9074// VSWP allows, but does not require, a type suffix. 9075defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 9076 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 9077defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 9078 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 9079 9080// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 9081defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 9082 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 9083defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 9084 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 9085defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 9086 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 9087defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 9088 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9089defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 9090 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9091defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 9092 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9093 9094// "vmov Rd, #-imm" can be handled via "vmvn". 9095def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 9096 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9097def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 9098 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9099def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 9100 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9101def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 9102 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9103 9104// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 9105// these should restrict to just the Q register variants, but the register 9106// classes are enough to match correctly regardless, so we keep it simple 9107// and just use MnemonicAlias. 9108def : NEONMnemonicAlias<"vbicq", "vbic">; 9109def : NEONMnemonicAlias<"vandq", "vand">; 9110def : NEONMnemonicAlias<"veorq", "veor">; 9111def : NEONMnemonicAlias<"vorrq", "vorr">; 9112 9113def : NEONMnemonicAlias<"vmovq", "vmov">; 9114def : NEONMnemonicAlias<"vmvnq", "vmvn">; 9115// Explicit versions for floating point so that the FPImm variants get 9116// handled early. The parser gets confused otherwise. 9117def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 9118def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 9119 9120def : NEONMnemonicAlias<"vaddq", "vadd">; 9121def : NEONMnemonicAlias<"vsubq", "vsub">; 9122 9123def : NEONMnemonicAlias<"vminq", "vmin">; 9124def : NEONMnemonicAlias<"vmaxq", "vmax">; 9125 9126def : NEONMnemonicAlias<"vmulq", "vmul">; 9127 9128def : NEONMnemonicAlias<"vabsq", "vabs">; 9129 9130def : NEONMnemonicAlias<"vshlq", "vshl">; 9131def : NEONMnemonicAlias<"vshrq", "vshr">; 9132 9133def : NEONMnemonicAlias<"vcvtq", "vcvt">; 9134 9135def : NEONMnemonicAlias<"vcleq", "vcle">; 9136def : NEONMnemonicAlias<"vceqq", "vceq">; 9137 9138def : NEONMnemonicAlias<"vzipq", "vzip">; 9139def : NEONMnemonicAlias<"vswpq", "vswp">; 9140 9141def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 9142def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 9143 9144 9145// Alias for loading floating point immediates that aren't representable 9146// using the vmov.f32 encoding but the bitpattern is representable using 9147// the .i32 encoding. 9148def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9149 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9150def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9151 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9152 9153// ARMv8.6a BFloat16 instructions. 9154let Predicates = [HasBF16, HasNEON] in { 9155class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6, 9156 dag oops, dag iops, list<dag> pattern> 9157 : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops, 9158 N3RegFrm, IIC_VDOTPROD, "", "", pattern> 9159{ 9160 let DecoderNamespace = "VFPV8"; 9161} 9162 9163class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy> 9164 : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst), 9165 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9166 [(set (AccumTy RegTy:$dst), 9167 (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9168 (InputTy RegTy:$Vn), 9169 (InputTy RegTy:$Vm)))]> { 9170 let Constraints = "$dst = $Vd"; 9171 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9172 let DecoderNamespace = "VFPV8"; 9173} 9174 9175multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, 9176 ValueType InputTy, dag RHS> { 9177 9178 def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst), 9179 (ins RegTy:$Vd, RegTy:$Vn, 9180 DPR_VFP2:$Vm, VectorIndex32:$lane), []> { 9181 bit lane; 9182 let Inst{5} = lane; 9183 let Constraints = "$dst = $Vd"; 9184 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane"); 9185 let DecoderNamespace = "VFPV8"; 9186 } 9187 9188 def : Pat< 9189 (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9190 (InputTy RegTy:$Vn), 9191 (InputTy (bitconvert (AccumTy 9192 (ARMvduplane (AccumTy RegTy:$Vm), 9193 VectorIndex32:$lane)))))), 9194 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 9195} 9196 9197def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>; 9198def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>; 9199 9200defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>; 9201defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 9202 9203class BF16MM<bit Q, RegisterClass RegTy, 9204 string opc> 9205 : N3Vnp<0b11000, 0b00, 0b1100, Q, 0, 9206 (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9207 N3RegFrm, IIC_VDOTPROD, "", "", 9208 [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd), 9209 (v8bf16 QPR:$Vn), 9210 (v8bf16 QPR:$Vm)))]> { 9211 let Constraints = "$dst = $Vd"; 9212 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9213 let DecoderNamespace = "VFPV8"; 9214} 9215 9216def VMMLA : BF16MM<1, QPR, "vmmla">; 9217 9218class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode> 9219 : N3VCP8<0b00, 0b11, T, 1, 9220 (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), 9221 NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "", 9222 [(set (v4f32 QPR:$dst), 9223 (OpNode (v4f32 QPR:$Vd), 9224 (v8bf16 QPR:$Vn), 9225 (v8bf16 QPR:$Vm)))]> { 9226 let Constraints = "$dst = $Vd"; 9227 let DecoderNamespace = "VFPV8"; 9228} 9229 9230def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>; 9231def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>; 9232 9233multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> { 9234 def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst), 9235 (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 9236 IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> { 9237 bits<2> idx; 9238 let Inst{5} = idx{1}; 9239 let Inst{3} = idx{0}; 9240 let Constraints = "$dst = $Vd"; 9241 let DecoderNamespace = "VFPV8"; 9242 } 9243 9244 def : Pat< 9245 (v4f32 (OpNode (v4f32 QPR:$Vd), 9246 (v8bf16 QPR:$Vn), 9247 (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm), 9248 VectorIndex16:$lane)))), 9249 (!cast<Instruction>(NAME) QPR:$Vd, 9250 QPR:$Vn, 9251 (EXTRACT_SUBREG QPR:$Vm, 9252 (DSubReg_i16_reg VectorIndex16:$lane)), 9253 (SubReg_i16_lane VectorIndex16:$lane))>; 9254} 9255 9256defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>; 9257defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>; 9258 9259def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0, 9260 (outs DPR:$Vd), (ins QPR:$Vm), 9261 NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>; 9262} 9263// End of BFloat16 instructions 9264