1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the ARM NEON instruction set. 10// 11//===----------------------------------------------------------------------===// 12 13 14//===----------------------------------------------------------------------===// 15// NEON-specific Operands. 16//===----------------------------------------------------------------------===// 17def nModImm : Operand<i32> { 18 let PrintMethod = "printVMOVModImmOperand"; 19} 20 21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 22def nImmSplatI8 : Operand<i32> { 23 let PrintMethod = "printVMOVModImmOperand"; 24 let ParserMatchClass = nImmSplatI8AsmOperand; 25} 26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 27def nImmSplatI16 : Operand<i32> { 28 let PrintMethod = "printVMOVModImmOperand"; 29 let ParserMatchClass = nImmSplatI16AsmOperand; 30} 31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 32def nImmSplatI32 : Operand<i32> { 33 let PrintMethod = "printVMOVModImmOperand"; 34 let ParserMatchClass = nImmSplatI32AsmOperand; 35} 36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 37def nImmSplatNotI16 : Operand<i32> { 38 let ParserMatchClass = nImmSplatNotI16AsmOperand; 39} 40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 41def nImmSplatNotI32 : Operand<i32> { 42 let ParserMatchClass = nImmSplatNotI32AsmOperand; 43} 44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 45def nImmVMOVI32 : Operand<i32> { 46 let PrintMethod = "printVMOVModImmOperand"; 47 let ParserMatchClass = nImmVMOVI32AsmOperand; 48} 49 50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To> 51 : AsmOperandClass { 52 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate"; 53 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">"; 54 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands"; 55} 56 57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To> 58 : AsmOperandClass { 59 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate"; 60 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">"; 61 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands"; 62} 63 64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> { 65 let PrintMethod = "printVMOVModImmOperand"; 66 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>; 67} 68 69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> { 70 let PrintMethod = "printVMOVModImmOperand"; 71 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>; 72} 73 74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 75def nImmVMOVI32Neg : Operand<i32> { 76 let PrintMethod = "printVMOVModImmOperand"; 77 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 78} 79def nImmVMOVF32 : Operand<i32> { 80 let PrintMethod = "printFPImmOperand"; 81 let ParserMatchClass = FPImmOperand; 82} 83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 84def nImmSplatI64 : Operand<i32> { 85 let PrintMethod = "printVMOVModImmOperand"; 86 let ParserMatchClass = nImmSplatI64AsmOperand; 87} 88 89def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } 93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 94 return ((uint64_t)Imm) < 8; 95}]> { 96 let ParserMatchClass = VectorIndex8Operand; 97 let PrintMethod = "printVectorIndex"; 98 let MIOperandInfo = (ops i32imm); 99} 100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 101 return ((uint64_t)Imm) < 4; 102}]> { 103 let ParserMatchClass = VectorIndex16Operand; 104 let PrintMethod = "printVectorIndex"; 105 let MIOperandInfo = (ops i32imm); 106} 107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 108 return ((uint64_t)Imm) < 2; 109}]> { 110 let ParserMatchClass = VectorIndex32Operand; 111 let PrintMethod = "printVectorIndex"; 112 let MIOperandInfo = (ops i32imm); 113} 114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{ 115 return ((uint64_t)Imm) < 1; 116}]> { 117 let ParserMatchClass = VectorIndex64Operand; 118 let PrintMethod = "printVectorIndex"; 119 let MIOperandInfo = (ops i32imm); 120} 121 122// Register list of one D register. 123def VecListOneDAsmOperand : AsmOperandClass { 124 let Name = "VecListOneD"; 125 let ParserMethod = "parseVectorList"; 126 let RenderMethod = "addVecListOperands"; 127} 128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 129 let ParserMatchClass = VecListOneDAsmOperand; 130} 131// Register list of two sequential D registers. 132def VecListDPairAsmOperand : AsmOperandClass { 133 let Name = "VecListDPair"; 134 let ParserMethod = "parseVectorList"; 135 let RenderMethod = "addVecListOperands"; 136} 137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 138 let ParserMatchClass = VecListDPairAsmOperand; 139} 140// Register list of three sequential D registers. 141def VecListThreeDAsmOperand : AsmOperandClass { 142 let Name = "VecListThreeD"; 143 let ParserMethod = "parseVectorList"; 144 let RenderMethod = "addVecListOperands"; 145} 146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 147 let ParserMatchClass = VecListThreeDAsmOperand; 148} 149// Register list of four sequential D registers. 150def VecListFourDAsmOperand : AsmOperandClass { 151 let Name = "VecListFourD"; 152 let ParserMethod = "parseVectorList"; 153 let RenderMethod = "addVecListOperands"; 154} 155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 156 let ParserMatchClass = VecListFourDAsmOperand; 157} 158// Register list of two D registers spaced by 2 (two sequential Q registers). 159def VecListDPairSpacedAsmOperand : AsmOperandClass { 160 let Name = "VecListDPairSpaced"; 161 let ParserMethod = "parseVectorList"; 162 let RenderMethod = "addVecListOperands"; 163} 164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 165 let ParserMatchClass = VecListDPairSpacedAsmOperand; 166} 167// Register list of three D registers spaced by 2 (three Q registers). 168def VecListThreeQAsmOperand : AsmOperandClass { 169 let Name = "VecListThreeQ"; 170 let ParserMethod = "parseVectorList"; 171 let RenderMethod = "addVecListOperands"; 172} 173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 174 let ParserMatchClass = VecListThreeQAsmOperand; 175} 176// Register list of three D registers spaced by 2 (three Q registers). 177def VecListFourQAsmOperand : AsmOperandClass { 178 let Name = "VecListFourQ"; 179 let ParserMethod = "parseVectorList"; 180 let RenderMethod = "addVecListOperands"; 181} 182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 183 let ParserMatchClass = VecListFourQAsmOperand; 184} 185 186// Register list of one D register, with "all lanes" subscripting. 187def VecListOneDAllLanesAsmOperand : AsmOperandClass { 188 let Name = "VecListOneDAllLanes"; 189 let ParserMethod = "parseVectorList"; 190 let RenderMethod = "addVecListOperands"; 191} 192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 193 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 194} 195// Register list of two D registers, with "all lanes" subscripting. 196def VecListDPairAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListDPairAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200} 201def VecListDPairAllLanes : RegisterOperand<DPair, 202 "printVectorListTwoAllLanes"> { 203 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 204} 205// Register list of two D registers spaced by 2 (two sequential Q registers). 206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 207 let Name = "VecListDPairSpacedAllLanes"; 208 let ParserMethod = "parseVectorList"; 209 let RenderMethod = "addVecListOperands"; 210} 211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc, 212 "printVectorListTwoSpacedAllLanes"> { 213 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 214} 215// Register list of three D registers, with "all lanes" subscripting. 216def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 217 let Name = "VecListThreeDAllLanes"; 218 let ParserMethod = "parseVectorList"; 219 let RenderMethod = "addVecListOperands"; 220} 221def VecListThreeDAllLanes : RegisterOperand<DPR, 222 "printVectorListThreeAllLanes"> { 223 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 224} 225// Register list of three D registers spaced by 2 (three sequential Q regs). 226def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 227 let Name = "VecListThreeQAllLanes"; 228 let ParserMethod = "parseVectorList"; 229 let RenderMethod = "addVecListOperands"; 230} 231def VecListThreeQAllLanes : RegisterOperand<DPR, 232 "printVectorListThreeSpacedAllLanes"> { 233 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 234} 235// Register list of four D registers, with "all lanes" subscripting. 236def VecListFourDAllLanesAsmOperand : AsmOperandClass { 237 let Name = "VecListFourDAllLanes"; 238 let ParserMethod = "parseVectorList"; 239 let RenderMethod = "addVecListOperands"; 240} 241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 242 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 243} 244// Register list of four D registers spaced by 2 (four sequential Q regs). 245def VecListFourQAllLanesAsmOperand : AsmOperandClass { 246 let Name = "VecListFourQAllLanes"; 247 let ParserMethod = "parseVectorList"; 248 let RenderMethod = "addVecListOperands"; 249} 250def VecListFourQAllLanes : RegisterOperand<DPR, 251 "printVectorListFourSpacedAllLanes"> { 252 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 253} 254 255 256// Register list of one D register, with byte lane subscripting. 257def VecListOneDByteIndexAsmOperand : AsmOperandClass { 258 let Name = "VecListOneDByteIndexed"; 259 let ParserMethod = "parseVectorList"; 260 let RenderMethod = "addVecListIndexedOperands"; 261} 262def VecListOneDByteIndexed : Operand<i32> { 263 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 264 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 265} 266// ...with half-word lane subscripting. 267def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 268 let Name = "VecListOneDHWordIndexed"; 269 let ParserMethod = "parseVectorList"; 270 let RenderMethod = "addVecListIndexedOperands"; 271} 272def VecListOneDHWordIndexed : Operand<i32> { 273 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 274 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 275} 276// ...with word lane subscripting. 277def VecListOneDWordIndexAsmOperand : AsmOperandClass { 278 let Name = "VecListOneDWordIndexed"; 279 let ParserMethod = "parseVectorList"; 280 let RenderMethod = "addVecListIndexedOperands"; 281} 282def VecListOneDWordIndexed : Operand<i32> { 283 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 284 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 285} 286 287// Register list of two D registers with byte lane subscripting. 288def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoDByteIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListTwoDByteIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297// ...with half-word lane subscripting. 298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 299 let Name = "VecListTwoDHWordIndexed"; 300 let ParserMethod = "parseVectorList"; 301 let RenderMethod = "addVecListIndexedOperands"; 302} 303def VecListTwoDHWordIndexed : Operand<i32> { 304 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 305 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 306} 307// ...with word lane subscripting. 308def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 309 let Name = "VecListTwoDWordIndexed"; 310 let ParserMethod = "parseVectorList"; 311 let RenderMethod = "addVecListIndexedOperands"; 312} 313def VecListTwoDWordIndexed : Operand<i32> { 314 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 315 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 316} 317// Register list of two Q registers with half-word lane subscripting. 318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 319 let Name = "VecListTwoQHWordIndexed"; 320 let ParserMethod = "parseVectorList"; 321 let RenderMethod = "addVecListIndexedOperands"; 322} 323def VecListTwoQHWordIndexed : Operand<i32> { 324 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 325 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 326} 327// ...with word lane subscripting. 328def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 329 let Name = "VecListTwoQWordIndexed"; 330 let ParserMethod = "parseVectorList"; 331 let RenderMethod = "addVecListIndexedOperands"; 332} 333def VecListTwoQWordIndexed : Operand<i32> { 334 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 335 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 336} 337 338 339// Register list of three D registers with byte lane subscripting. 340def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeDByteIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344} 345def VecListThreeDByteIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348} 349// ...with half-word lane subscripting. 350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 351 let Name = "VecListThreeDHWordIndexed"; 352 let ParserMethod = "parseVectorList"; 353 let RenderMethod = "addVecListIndexedOperands"; 354} 355def VecListThreeDHWordIndexed : Operand<i32> { 356 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 357 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 358} 359// ...with word lane subscripting. 360def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 361 let Name = "VecListThreeDWordIndexed"; 362 let ParserMethod = "parseVectorList"; 363 let RenderMethod = "addVecListIndexedOperands"; 364} 365def VecListThreeDWordIndexed : Operand<i32> { 366 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 367 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 368} 369// Register list of three Q registers with half-word lane subscripting. 370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 371 let Name = "VecListThreeQHWordIndexed"; 372 let ParserMethod = "parseVectorList"; 373 let RenderMethod = "addVecListIndexedOperands"; 374} 375def VecListThreeQHWordIndexed : Operand<i32> { 376 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 377 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 378} 379// ...with word lane subscripting. 380def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 381 let Name = "VecListThreeQWordIndexed"; 382 let ParserMethod = "parseVectorList"; 383 let RenderMethod = "addVecListIndexedOperands"; 384} 385def VecListThreeQWordIndexed : Operand<i32> { 386 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 387 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 388} 389 390// Register list of four D registers with byte lane subscripting. 391def VecListFourDByteIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourDByteIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListFourDByteIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400// ...with half-word lane subscripting. 401def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 402 let Name = "VecListFourDHWordIndexed"; 403 let ParserMethod = "parseVectorList"; 404 let RenderMethod = "addVecListIndexedOperands"; 405} 406def VecListFourDHWordIndexed : Operand<i32> { 407 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 408 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 409} 410// ...with word lane subscripting. 411def VecListFourDWordIndexAsmOperand : AsmOperandClass { 412 let Name = "VecListFourDWordIndexed"; 413 let ParserMethod = "parseVectorList"; 414 let RenderMethod = "addVecListIndexedOperands"; 415} 416def VecListFourDWordIndexed : Operand<i32> { 417 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 418 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 419} 420// Register list of four Q registers with half-word lane subscripting. 421def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 422 let Name = "VecListFourQHWordIndexed"; 423 let ParserMethod = "parseVectorList"; 424 let RenderMethod = "addVecListIndexedOperands"; 425} 426def VecListFourQHWordIndexed : Operand<i32> { 427 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 428 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 429} 430// ...with word lane subscripting. 431def VecListFourQWordIndexAsmOperand : AsmOperandClass { 432 let Name = "VecListFourQWordIndexed"; 433 let ParserMethod = "parseVectorList"; 434 let RenderMethod = "addVecListIndexedOperands"; 435} 436def VecListFourQWordIndexed : Operand<i32> { 437 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 438 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 439} 440 441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 442 return cast<LoadSDNode>(N)->getAlignment() >= 8; 443}]>; 444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 445 (store node:$val, node:$ptr), [{ 446 return cast<StoreSDNode>(N)->getAlignment() >= 8; 447}]>; 448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 449 return cast<LoadSDNode>(N)->getAlignment() == 4; 450}]>; 451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 452 (store node:$val, node:$ptr), [{ 453 return cast<StoreSDNode>(N)->getAlignment() == 4; 454}]>; 455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 456 return cast<LoadSDNode>(N)->getAlignment() == 2; 457}]>; 458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 459 (store node:$val, node:$ptr), [{ 460 return cast<StoreSDNode>(N)->getAlignment() == 2; 461}]>; 462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 463 return cast<LoadSDNode>(N)->getAlignment() == 1; 464}]>; 465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 466 (store node:$val, node:$ptr), [{ 467 return cast<StoreSDNode>(N)->getAlignment() == 1; 468}]>; 469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 470 return cast<LoadSDNode>(N)->getAlignment() < 4; 471}]>; 472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 473 (store node:$val, node:$ptr), [{ 474 return cast<StoreSDNode>(N)->getAlignment() < 4; 475}]>; 476 477//===----------------------------------------------------------------------===// 478// NEON-specific DAG Nodes. 479//===----------------------------------------------------------------------===// 480 481def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 482def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; 483 484// Types for vector shift by immediates. The "SHX" version is for long and 485// narrow operations where the source and destination vectors have different 486// types. The "SHINS" version is for shift and insert operations. 487def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 488 SDTCisVT<2, i32>]>; 489def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 490 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 491 492def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; 493 494def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; 495def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>; 496def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>; 497 498def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>; 499def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>; 500def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>; 501def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>; 502def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>; 503def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>; 504 505def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>; 506def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>; 507def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; 508 509def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; 510def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; 511 512def NEONvbsp : SDNode<"ARMISD::VBSP", 513 SDTypeProfile<1, 3, [SDTCisVec<0>, 514 SDTCisSameAs<0, 1>, 515 SDTCisSameAs<0, 2>, 516 SDTCisSameAs<0, 3>]>>; 517 518def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 519 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 520def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 521 522def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 523 SDTCisSameAs<0, 2>, 524 SDTCisSameAs<0, 3>]>; 525def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 526def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 527def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 528 529def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 530 SDTCisVT<2, v8i8>]>; 531def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 532 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; 533def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; 534def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; 535 536 537//===----------------------------------------------------------------------===// 538// NEON load / store instructions 539//===----------------------------------------------------------------------===// 540 541// Use VLDM to load a Q register as a D register pair. 542// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 543def VLDMQIA 544 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 545 IIC_fpLoad_m, "", 546 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; 547 548// Use VSTM to store a Q register as a D register pair. 549// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 550def VSTMQIA 551 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 552 IIC_fpStore_m, "", 553 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; 554 555// Classes for VLD* pseudo-instructions with multi-register operands. 556// These are expanded to real instructions after register allocation. 557class VLDQPseudo<InstrItinClass itin> 558 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 559class VLDQWBPseudo<InstrItinClass itin> 560 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 561 (ins addrmode6:$addr, am6offset:$offset), itin, 562 "$addr.addr = $wb">; 563class VLDQWBfixedPseudo<InstrItinClass itin> 564 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 565 (ins addrmode6:$addr), itin, 566 "$addr.addr = $wb">; 567class VLDQWBregisterPseudo<InstrItinClass itin> 568 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 569 (ins addrmode6:$addr, rGPR:$offset), itin, 570 "$addr.addr = $wb">; 571 572class VLDQQPseudo<InstrItinClass itin> 573 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 574class VLDQQWBPseudo<InstrItinClass itin> 575 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 576 (ins addrmode6:$addr, am6offset:$offset), itin, 577 "$addr.addr = $wb">; 578class VLDQQWBfixedPseudo<InstrItinClass itin> 579 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 580 (ins addrmode6:$addr), itin, 581 "$addr.addr = $wb">; 582class VLDQQWBregisterPseudo<InstrItinClass itin> 583 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 584 (ins addrmode6:$addr, rGPR:$offset), itin, 585 "$addr.addr = $wb">; 586 587 588class VLDQQQQPseudo<InstrItinClass itin> 589 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 590 "$src = $dst">; 591class VLDQQQQWBPseudo<InstrItinClass itin> 592 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 593 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 594 "$addr.addr = $wb, $src = $dst">; 595 596let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 597 598// VLD1 : Vector Load (multiple single elements) 599class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 600 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 601 (ins AddrMode:$Rn), IIC_VLD1, 602 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { 603 let Rm = 0b1111; 604 let Inst{4} = Rn{4}; 605 let DecoderMethod = "DecodeVLDST1Instruction"; 606} 607class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 608 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 609 (ins AddrMode:$Rn), IIC_VLD1x2, 610 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { 611 let Rm = 0b1111; 612 let Inst{5-4} = Rn{5-4}; 613 let DecoderMethod = "DecodeVLDST1Instruction"; 614} 615 616def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 617def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 618def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 619def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 620 621def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 622def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 623def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 624def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 625 626// ...with address register writeback: 627multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 628 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 629 (ins AddrMode:$Rn), IIC_VLD1u, 630 "vld1", Dt, "$Vd, $Rn!", 631 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 632 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 633 let Inst{4} = Rn{4}; 634 let DecoderMethod = "DecodeVLDST1Instruction"; 635 } 636 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 637 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 638 "vld1", Dt, "$Vd, $Rn, $Rm", 639 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 640 let Inst{4} = Rn{4}; 641 let DecoderMethod = "DecodeVLDST1Instruction"; 642 } 643} 644multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 645 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 646 (ins AddrMode:$Rn), IIC_VLD1x2u, 647 "vld1", Dt, "$Vd, $Rn!", 648 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 649 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 650 let Inst{5-4} = Rn{5-4}; 651 let DecoderMethod = "DecodeVLDST1Instruction"; 652 } 653 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 654 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 655 "vld1", Dt, "$Vd, $Rn, $Rm", 656 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 657 let Inst{5-4} = Rn{5-4}; 658 let DecoderMethod = "DecodeVLDST1Instruction"; 659 } 660} 661 662defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 663defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 664defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 665defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 666defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 667defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 668defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 669defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 670 671// ...with 3 registers 672class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 673 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 674 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 675 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { 676 let Rm = 0b1111; 677 let Inst{4} = Rn{4}; 678 let DecoderMethod = "DecodeVLDST1Instruction"; 679} 680multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 681 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 682 (ins AddrMode:$Rn), IIC_VLD1x2u, 683 "vld1", Dt, "$Vd, $Rn!", 684 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 685 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 686 let Inst{4} = Rn{4}; 687 let DecoderMethod = "DecodeVLDST1Instruction"; 688 } 689 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 690 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 691 "vld1", Dt, "$Vd, $Rn, $Rm", 692 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 693 let Inst{4} = Rn{4}; 694 let DecoderMethod = "DecodeVLDST1Instruction"; 695 } 696} 697 698def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 699def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 700def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 701def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 702 703defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 704defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 705defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 706defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 707 708def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 709def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 710def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 711def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 712def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 713def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 714 715def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 716def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 717def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 718def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 719def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 720def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 721def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 722def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 723 724// ...with 4 registers 725class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 726 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 727 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 728 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { 729 let Rm = 0b1111; 730 let Inst{5-4} = Rn{5-4}; 731 let DecoderMethod = "DecodeVLDST1Instruction"; 732} 733multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 734 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 735 (ins AddrMode:$Rn), IIC_VLD1x2u, 736 "vld1", Dt, "$Vd, $Rn!", 737 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 738 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 739 let Inst{5-4} = Rn{5-4}; 740 let DecoderMethod = "DecodeVLDST1Instruction"; 741 } 742 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 743 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 744 "vld1", Dt, "$Vd, $Rn, $Rm", 745 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 746 let Inst{5-4} = Rn{5-4}; 747 let DecoderMethod = "DecodeVLDST1Instruction"; 748 } 749} 750 751def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 752def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 753def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 754def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 755 756defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 757defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 758defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 759defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 760 761def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 762def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 763def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 764def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 765def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 766def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 767 768def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 769def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 770def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 771def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 772def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 773def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 774def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 775def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 776 777// VLD2 : Vector Load (multiple 2-element structures) 778class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 779 InstrItinClass itin, Operand AddrMode> 780 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 781 (ins AddrMode:$Rn), itin, 782 "vld2", Dt, "$Vd, $Rn", "", []> { 783 let Rm = 0b1111; 784 let Inst{5-4} = Rn{5-4}; 785 let DecoderMethod = "DecodeVLDST2Instruction"; 786} 787 788def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 789 addrmode6align64or128>, Sched<[WriteVLD2]>; 790def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 791 addrmode6align64or128>, Sched<[WriteVLD2]>; 792def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 793 addrmode6align64or128>, Sched<[WriteVLD2]>; 794 795def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 796 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 797def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 798 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 799def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 800 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 801 802def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 803def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 804def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 805 806// ...with address register writeback: 807multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 808 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 809 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 810 (ins AddrMode:$Rn), itin, 811 "vld2", Dt, "$Vd, $Rn!", 812 "$Rn.addr = $wb", []> { 813 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 814 let Inst{5-4} = Rn{5-4}; 815 let DecoderMethod = "DecodeVLDST2Instruction"; 816 } 817 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 818 (ins AddrMode:$Rn, rGPR:$Rm), itin, 819 "vld2", Dt, "$Vd, $Rn, $Rm", 820 "$Rn.addr = $wb", []> { 821 let Inst{5-4} = Rn{5-4}; 822 let DecoderMethod = "DecodeVLDST2Instruction"; 823 } 824} 825 826defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 827 addrmode6align64or128>, Sched<[WriteVLD2]>; 828defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 829 addrmode6align64or128>, Sched<[WriteVLD2]>; 830defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 831 addrmode6align64or128>, Sched<[WriteVLD2]>; 832 833defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 834 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 835defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 836 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 837defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 838 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 839 840def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 841def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 842def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 843def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 844def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 845def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 846 847// ...with double-spaced registers 848def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 849 addrmode6align64or128>, Sched<[WriteVLD2]>; 850def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 851 addrmode6align64or128>, Sched<[WriteVLD2]>; 852def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 853 addrmode6align64or128>, Sched<[WriteVLD2]>; 854defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 855 addrmode6align64or128>, Sched<[WriteVLD2]>; 856defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 857 addrmode6align64or128>, Sched<[WriteVLD2]>; 858defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 859 addrmode6align64or128>, Sched<[WriteVLD2]>; 860 861// VLD3 : Vector Load (multiple 3-element structures) 862class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 863 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 864 (ins addrmode6:$Rn), IIC_VLD3, 865 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { 866 let Rm = 0b1111; 867 let Inst{4} = Rn{4}; 868 let DecoderMethod = "DecodeVLDST3Instruction"; 869} 870 871def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 872def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 873def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 874 875def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 876def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 877def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 878 879// ...with address register writeback: 880class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 881 : NLdSt<0, 0b10, op11_8, op7_4, 882 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 883 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 884 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 885 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 886 let Inst{4} = Rn{4}; 887 let DecoderMethod = "DecodeVLDST3Instruction"; 888} 889 890def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 891def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 892def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 893 894def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 895def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 896def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 897 898// ...with double-spaced registers: 899def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 900def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 901def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 902def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 903def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 904def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 905 906def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 907def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 908def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 909 910// ...alternate versions to be allocated odd register numbers: 911def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 912def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 913def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 914 915def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 916def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 917def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 918 919// VLD4 : Vector Load (multiple 4-element structures) 920class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 921 : NLdSt<0, 0b10, op11_8, op7_4, 922 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 923 (ins addrmode6:$Rn), IIC_VLD4, 924 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, 925 Sched<[WriteVLD4]> { 926 let Rm = 0b1111; 927 let Inst{5-4} = Rn{5-4}; 928 let DecoderMethod = "DecodeVLDST4Instruction"; 929} 930 931def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 932def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 933def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 934 935def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 936def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 937def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 938 939// ...with address register writeback: 940class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 941 : NLdSt<0, 0b10, op11_8, op7_4, 942 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 943 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 944 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 945 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 946 let Inst{5-4} = Rn{5-4}; 947 let DecoderMethod = "DecodeVLDST4Instruction"; 948} 949 950def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 951def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 952def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 953 954def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 955def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 956def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 957 958// ...with double-spaced registers: 959def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 960def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 961def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 962def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 963def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 964def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 965 966def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 967def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 968def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 969 970// ...alternate versions to be allocated odd register numbers: 971def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 972def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 973def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 974 975def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 976def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 977def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 978 979} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 980 981// Classes for VLD*LN pseudo-instructions with multi-register operands. 982// These are expanded to real instructions after register allocation. 983class VLDQLNPseudo<InstrItinClass itin> 984 : PseudoNLdSt<(outs QPR:$dst), 985 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 986 itin, "$src = $dst">; 987class VLDQLNWBPseudo<InstrItinClass itin> 988 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 989 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 990 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 991class VLDQQLNPseudo<InstrItinClass itin> 992 : PseudoNLdSt<(outs QQPR:$dst), 993 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 994 itin, "$src = $dst">; 995class VLDQQLNWBPseudo<InstrItinClass itin> 996 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 997 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 998 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 999class VLDQQQQLNPseudo<InstrItinClass itin> 1000 : PseudoNLdSt<(outs QQQQPR:$dst), 1001 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1002 itin, "$src = $dst">; 1003class VLDQQQQLNWBPseudo<InstrItinClass itin> 1004 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1005 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1006 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1007 1008// VLD1LN : Vector Load (single element to one lane) 1009class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1010 PatFrag LoadOp> 1011 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1012 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1013 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1014 "$src = $Vd", 1015 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1016 (i32 (LoadOp addrmode6:$Rn)), 1017 imm:$lane))]> { 1018 let Rm = 0b1111; 1019 let DecoderMethod = "DecodeVLD1LN"; 1020} 1021class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1022 PatFrag LoadOp> 1023 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1024 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1025 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1026 "$src = $Vd", 1027 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1028 (i32 (LoadOp addrmode6oneL32:$Rn)), 1029 imm:$lane))]>, Sched<[WriteVLD1]> { 1030 let Rm = 0b1111; 1031 let DecoderMethod = "DecodeVLD1LN"; 1032} 1033class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>, 1034 Sched<[WriteVLD1]> { 1035 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1036 (i32 (LoadOp addrmode6:$addr)), 1037 imm:$lane))]; 1038} 1039 1040def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1041 let Inst{7-5} = lane{2-0}; 1042} 1043def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1044 let Inst{7-6} = lane{1-0}; 1045 let Inst{5-4} = Rn{5-4}; 1046} 1047def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1048 let Inst{7} = lane{0}; 1049 let Inst{5-4} = Rn{5-4}; 1050} 1051 1052def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1053def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1054def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1055 1056let Predicates = [HasNEON] in { 1057def : Pat<(vector_insert (v4f16 DPR:$src), 1058 (f16 (load addrmode6:$addr)), imm:$lane), 1059 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1060def : Pat<(vector_insert (v8f16 QPR:$src), 1061 (f16 (load addrmode6:$addr)), imm:$lane), 1062 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1063def : Pat<(vector_insert (v4bf16 DPR:$src), 1064 (bf16 (load addrmode6:$addr)), imm:$lane), 1065 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1066def : Pat<(vector_insert (v8bf16 QPR:$src), 1067 (bf16 (load addrmode6:$addr)), imm:$lane), 1068 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1069def : Pat<(vector_insert (v2f32 DPR:$src), 1070 (f32 (load addrmode6:$addr)), imm:$lane), 1071 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1072def : Pat<(vector_insert (v4f32 QPR:$src), 1073 (f32 (load addrmode6:$addr)), imm:$lane), 1074 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1075 1076// A 64-bit subvector insert to the first 128-bit vector position 1077// is a subregister copy that needs no instruction. 1078def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), 1079 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1080def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), 1081 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1082def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), 1083 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1084def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), 1085 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1086def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), 1087 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1088def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), 1089 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1090} 1091 1092 1093let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1094 1095// ...with address register writeback: 1096class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1097 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1098 (ins addrmode6:$Rn, am6offset:$Rm, 1099 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1100 "\\{$Vd[$lane]\\}, $Rn$Rm", 1101 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1102 let DecoderMethod = "DecodeVLD1LN"; 1103} 1104 1105def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1106 let Inst{7-5} = lane{2-0}; 1107} 1108def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1109 let Inst{7-6} = lane{1-0}; 1110 let Inst{4} = Rn{4}; 1111} 1112def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1113 let Inst{7} = lane{0}; 1114 let Inst{5} = Rn{4}; 1115 let Inst{4} = Rn{4}; 1116} 1117 1118def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1119def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1120def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1121 1122// VLD2LN : Vector Load (single 2-element structure to one lane) 1123class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1124 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1125 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1126 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1127 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { 1128 let Rm = 0b1111; 1129 let Inst{4} = Rn{4}; 1130 let DecoderMethod = "DecodeVLD2LN"; 1131} 1132 1133def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1134 let Inst{7-5} = lane{2-0}; 1135} 1136def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1137 let Inst{7-6} = lane{1-0}; 1138} 1139def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1140 let Inst{7} = lane{0}; 1141} 1142 1143def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1144def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1145def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1146 1147// ...with double-spaced registers: 1148def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1149 let Inst{7-6} = lane{1-0}; 1150} 1151def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1152 let Inst{7} = lane{0}; 1153} 1154 1155def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1156def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1157 1158// ...with address register writeback: 1159class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1160 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1161 (ins addrmode6:$Rn, am6offset:$Rm, 1162 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1163 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1164 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1165 let Inst{4} = Rn{4}; 1166 let DecoderMethod = "DecodeVLD2LN"; 1167} 1168 1169def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1170 let Inst{7-5} = lane{2-0}; 1171} 1172def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1173 let Inst{7-6} = lane{1-0}; 1174} 1175def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1176 let Inst{7} = lane{0}; 1177} 1178 1179def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1180def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1181def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1182 1183def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1184 let Inst{7-6} = lane{1-0}; 1185} 1186def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1187 let Inst{7} = lane{0}; 1188} 1189 1190def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1191def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1192 1193// VLD3LN : Vector Load (single 3-element structure to one lane) 1194class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1195 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1196 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1197 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1198 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1199 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { 1200 let Rm = 0b1111; 1201 let DecoderMethod = "DecodeVLD3LN"; 1202} 1203 1204def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1205 let Inst{7-5} = lane{2-0}; 1206} 1207def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1208 let Inst{7-6} = lane{1-0}; 1209} 1210def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1211 let Inst{7} = lane{0}; 1212} 1213 1214def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1215def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1216def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1217 1218// ...with double-spaced registers: 1219def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1220 let Inst{7-6} = lane{1-0}; 1221} 1222def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1223 let Inst{7} = lane{0}; 1224} 1225 1226def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1227def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1228 1229// ...with address register writeback: 1230class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1231 : NLdStLn<1, 0b10, op11_8, op7_4, 1232 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1233 (ins addrmode6:$Rn, am6offset:$Rm, 1234 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1235 IIC_VLD3lnu, "vld3", Dt, 1236 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1237 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1238 []>, Sched<[WriteVLD2]> { 1239 let DecoderMethod = "DecodeVLD3LN"; 1240} 1241 1242def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1243 let Inst{7-5} = lane{2-0}; 1244} 1245def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1246 let Inst{7-6} = lane{1-0}; 1247} 1248def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1249 let Inst{7} = lane{0}; 1250} 1251 1252def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1253def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1254def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1255 1256def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1257 let Inst{7-6} = lane{1-0}; 1258} 1259def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1260 let Inst{7} = lane{0}; 1261} 1262 1263def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1264def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1265 1266// VLD4LN : Vector Load (single 4-element structure to one lane) 1267class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1268 : NLdStLn<1, 0b10, op11_8, op7_4, 1269 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1270 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1271 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1272 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1273 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, 1274 Sched<[WriteVLD2]> { 1275 let Rm = 0b1111; 1276 let Inst{4} = Rn{4}; 1277 let DecoderMethod = "DecodeVLD4LN"; 1278} 1279 1280def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1281 let Inst{7-5} = lane{2-0}; 1282} 1283def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1284 let Inst{7-6} = lane{1-0}; 1285} 1286def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1287 let Inst{7} = lane{0}; 1288 let Inst{5} = Rn{5}; 1289} 1290 1291def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1292def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1293def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1294 1295// ...with double-spaced registers: 1296def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1297 let Inst{7-6} = lane{1-0}; 1298} 1299def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1300 let Inst{7} = lane{0}; 1301 let Inst{5} = Rn{5}; 1302} 1303 1304def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1305def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1306 1307// ...with address register writeback: 1308class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1309 : NLdStLn<1, 0b10, op11_8, op7_4, 1310 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1311 (ins addrmode6:$Rn, am6offset:$Rm, 1312 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1313 IIC_VLD4lnu, "vld4", Dt, 1314"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1315"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1316 []> { 1317 let Inst{4} = Rn{4}; 1318 let DecoderMethod = "DecodeVLD4LN" ; 1319} 1320 1321def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1322 let Inst{7-5} = lane{2-0}; 1323} 1324def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1325 let Inst{7-6} = lane{1-0}; 1326} 1327def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1328 let Inst{7} = lane{0}; 1329 let Inst{5} = Rn{5}; 1330} 1331 1332def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1333def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1334def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1335 1336def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1337 let Inst{7-6} = lane{1-0}; 1338} 1339def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1340 let Inst{7} = lane{0}; 1341 let Inst{5} = Rn{5}; 1342} 1343 1344def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1345def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1346 1347} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1348 1349// VLD1DUP : Vector Load (single element to all lanes) 1350class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1351 Operand AddrMode> 1352 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1353 (ins AddrMode:$Rn), 1354 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1355 [(set VecListOneDAllLanes:$Vd, 1356 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>, 1357 Sched<[WriteVLD2]> { 1358 let Rm = 0b1111; 1359 let Inst{4} = Rn{4}; 1360 let DecoderMethod = "DecodeVLD1DupInstruction"; 1361} 1362def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1363 addrmode6dupalignNone>; 1364def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1365 addrmode6dupalign16>; 1366def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1367 addrmode6dupalign32>; 1368 1369let Predicates = [HasNEON] in { 1370def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1371 (VLD1DUPd32 addrmode6:$addr)>; 1372} 1373 1374class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1375 Operand AddrMode> 1376 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1377 (ins AddrMode:$Rn), IIC_VLD1dup, 1378 "vld1", Dt, "$Vd, $Rn", "", 1379 [(set VecListDPairAllLanes:$Vd, 1380 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1381 let Rm = 0b1111; 1382 let Inst{4} = Rn{4}; 1383 let DecoderMethod = "DecodeVLD1DupInstruction"; 1384} 1385 1386def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1387 addrmode6dupalignNone>; 1388def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1389 addrmode6dupalign16>; 1390def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1391 addrmode6dupalign32>; 1392 1393let Predicates = [HasNEON] in { 1394def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1395 (VLD1DUPq32 addrmode6:$addr)>; 1396} 1397 1398let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1399// ...with address register writeback: 1400multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1401 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1402 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1403 (ins AddrMode:$Rn), IIC_VLD1dupu, 1404 "vld1", Dt, "$Vd, $Rn!", 1405 "$Rn.addr = $wb", []> { 1406 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1407 let Inst{4} = Rn{4}; 1408 let DecoderMethod = "DecodeVLD1DupInstruction"; 1409 } 1410 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1411 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1412 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1413 "vld1", Dt, "$Vd, $Rn, $Rm", 1414 "$Rn.addr = $wb", []> { 1415 let Inst{4} = Rn{4}; 1416 let DecoderMethod = "DecodeVLD1DupInstruction"; 1417 } 1418} 1419multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1420 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1421 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1422 (ins AddrMode:$Rn), IIC_VLD1dupu, 1423 "vld1", Dt, "$Vd, $Rn!", 1424 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1425 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1426 let Inst{4} = Rn{4}; 1427 let DecoderMethod = "DecodeVLD1DupInstruction"; 1428 } 1429 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1430 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1431 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1432 "vld1", Dt, "$Vd, $Rn, $Rm", 1433 "$Rn.addr = $wb", []> { 1434 let Inst{4} = Rn{4}; 1435 let DecoderMethod = "DecodeVLD1DupInstruction"; 1436 } 1437} 1438 1439defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1440defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1441defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1442 1443defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1444defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1445defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1446 1447// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1448class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1449 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1450 (ins AddrMode:$Rn), IIC_VLD2dup, 1451 "vld2", Dt, "$Vd, $Rn", "", []> { 1452 let Rm = 0b1111; 1453 let Inst{4} = Rn{4}; 1454 let DecoderMethod = "DecodeVLD2DupInstruction"; 1455} 1456 1457def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1458 addrmode6dupalign16>; 1459def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1460 addrmode6dupalign32>; 1461def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1462 addrmode6dupalign64>; 1463 1464// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1465// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1466// ...with double-spaced registers 1467def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1468 addrmode6dupalign16>; 1469def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1470 addrmode6dupalign32>; 1471def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1472 addrmode6dupalign64>; 1473 1474def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1475def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1476def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1477def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1478def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1479def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1480 1481// ...with address register writeback: 1482multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1483 Operand AddrMode> { 1484 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1485 (outs VdTy:$Vd, GPR:$wb), 1486 (ins AddrMode:$Rn), IIC_VLD2dupu, 1487 "vld2", Dt, "$Vd, $Rn!", 1488 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1489 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1490 let Inst{4} = Rn{4}; 1491 let DecoderMethod = "DecodeVLD2DupInstruction"; 1492 } 1493 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1494 (outs VdTy:$Vd, GPR:$wb), 1495 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1496 "vld2", Dt, "$Vd, $Rn, $Rm", 1497 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1498 let Inst{4} = Rn{4}; 1499 let DecoderMethod = "DecodeVLD2DupInstruction"; 1500 } 1501} 1502 1503defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1504 addrmode6dupalign16>; 1505defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1506 addrmode6dupalign32>; 1507defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1508 addrmode6dupalign64>; 1509 1510defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1511 addrmode6dupalign16>; 1512defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1513 addrmode6dupalign32>; 1514defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1515 addrmode6dupalign64>; 1516 1517// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1518class VLD3DUP<bits<4> op7_4, string Dt> 1519 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1520 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1521 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, 1522 Sched<[WriteVLD2]> { 1523 let Rm = 0b1111; 1524 let Inst{4} = 0; 1525 let DecoderMethod = "DecodeVLD3DupInstruction"; 1526} 1527 1528def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1529def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1530def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1531 1532def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1533def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1534def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1535 1536// ...with double-spaced registers (not used for codegen): 1537def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1538def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1539def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1540 1541def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1542def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1543def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1544def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1545def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1546def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1547 1548// ...with address register writeback: 1549class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1550 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1551 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1552 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1553 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1554 let Inst{4} = 0; 1555 let DecoderMethod = "DecodeVLD3DupInstruction"; 1556} 1557 1558def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1559def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1560def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1561 1562def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1563def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1564def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1565 1566def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1567def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1568def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1569 1570// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1571class VLD4DUP<bits<4> op7_4, string Dt> 1572 : NLdSt<1, 0b10, 0b1111, op7_4, 1573 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1574 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1575 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1576 let Rm = 0b1111; 1577 let Inst{4} = Rn{4}; 1578 let DecoderMethod = "DecodeVLD4DupInstruction"; 1579} 1580 1581def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1582def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1583def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1584 1585def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1586def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1587def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1588 1589// ...with double-spaced registers (not used for codegen): 1590def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1591def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1592def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1593 1594def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1595def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1596def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1597def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1598def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1599def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1600 1601// ...with address register writeback: 1602class VLD4DUPWB<bits<4> op7_4, string Dt> 1603 : NLdSt<1, 0b10, 0b1111, op7_4, 1604 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1605 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1606 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1607 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1608 let Inst{4} = Rn{4}; 1609 let DecoderMethod = "DecodeVLD4DupInstruction"; 1610} 1611 1612def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1613def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1614def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1615 1616def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1617def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1618def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1619 1620def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1621def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1622def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1623 1624} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1625 1626let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1627 1628// Classes for VST* pseudo-instructions with multi-register operands. 1629// These are expanded to real instructions after register allocation. 1630class VSTQPseudo<InstrItinClass itin> 1631 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1632class VSTQWBPseudo<InstrItinClass itin> 1633 : PseudoNLdSt<(outs GPR:$wb), 1634 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1635 "$addr.addr = $wb">; 1636class VSTQWBfixedPseudo<InstrItinClass itin> 1637 : PseudoNLdSt<(outs GPR:$wb), 1638 (ins addrmode6:$addr, QPR:$src), itin, 1639 "$addr.addr = $wb">; 1640class VSTQWBregisterPseudo<InstrItinClass itin> 1641 : PseudoNLdSt<(outs GPR:$wb), 1642 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1643 "$addr.addr = $wb">; 1644class VSTQQPseudo<InstrItinClass itin> 1645 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1646class VSTQQWBPseudo<InstrItinClass itin> 1647 : PseudoNLdSt<(outs GPR:$wb), 1648 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1649 "$addr.addr = $wb">; 1650class VSTQQWBfixedPseudo<InstrItinClass itin> 1651 : PseudoNLdSt<(outs GPR:$wb), 1652 (ins addrmode6:$addr, QQPR:$src), itin, 1653 "$addr.addr = $wb">; 1654class VSTQQWBregisterPseudo<InstrItinClass itin> 1655 : PseudoNLdSt<(outs GPR:$wb), 1656 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1657 "$addr.addr = $wb">; 1658 1659class VSTQQQQPseudo<InstrItinClass itin> 1660 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1661class VSTQQQQWBPseudo<InstrItinClass itin> 1662 : PseudoNLdSt<(outs GPR:$wb), 1663 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1664 "$addr.addr = $wb">; 1665 1666// VST1 : Vector Store (multiple single elements) 1667class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1668 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1669 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { 1670 let Rm = 0b1111; 1671 let Inst{4} = Rn{4}; 1672 let DecoderMethod = "DecodeVLDST1Instruction"; 1673} 1674class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1675 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1676 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { 1677 let Rm = 0b1111; 1678 let Inst{5-4} = Rn{5-4}; 1679 let DecoderMethod = "DecodeVLDST1Instruction"; 1680} 1681 1682def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1683def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1684def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1685def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1686 1687def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1688def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1689def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1690def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1691 1692// ...with address register writeback: 1693multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1694 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1695 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1696 "vst1", Dt, "$Vd, $Rn!", 1697 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1698 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1699 let Inst{4} = Rn{4}; 1700 let DecoderMethod = "DecodeVLDST1Instruction"; 1701 } 1702 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1703 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1704 IIC_VLD1u, 1705 "vst1", Dt, "$Vd, $Rn, $Rm", 1706 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1707 let Inst{4} = Rn{4}; 1708 let DecoderMethod = "DecodeVLDST1Instruction"; 1709 } 1710} 1711multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1712 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1713 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1714 "vst1", Dt, "$Vd, $Rn!", 1715 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1716 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1717 let Inst{5-4} = Rn{5-4}; 1718 let DecoderMethod = "DecodeVLDST1Instruction"; 1719 } 1720 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1721 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1722 IIC_VLD1x2u, 1723 "vst1", Dt, "$Vd, $Rn, $Rm", 1724 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1725 let Inst{5-4} = Rn{5-4}; 1726 let DecoderMethod = "DecodeVLDST1Instruction"; 1727 } 1728} 1729 1730defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1731defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1732defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1733defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1734 1735defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1736defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1737defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1738defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1739 1740// ...with 3 registers 1741class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1742 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1743 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1744 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { 1745 let Rm = 0b1111; 1746 let Inst{4} = Rn{4}; 1747 let DecoderMethod = "DecodeVLDST1Instruction"; 1748} 1749multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1750 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1751 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1752 "vst1", Dt, "$Vd, $Rn!", 1753 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1754 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1755 let Inst{5-4} = Rn{5-4}; 1756 let DecoderMethod = "DecodeVLDST1Instruction"; 1757 } 1758 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1759 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1760 IIC_VLD1x3u, 1761 "vst1", Dt, "$Vd, $Rn, $Rm", 1762 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1763 let Inst{5-4} = Rn{5-4}; 1764 let DecoderMethod = "DecodeVLDST1Instruction"; 1765 } 1766} 1767 1768def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1769def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1770def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1771def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1772 1773defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1774defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1775defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1776defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1777 1778def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1779def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1780def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1781def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1782def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1783def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1784 1785def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1786def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1787def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1788def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1789def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1790def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1791def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1792def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1793 1794// ...with 4 registers 1795class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1796 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1797 (ins AddrMode:$Rn, VecListFourD:$Vd), 1798 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1799 []>, Sched<[WriteVST4]> { 1800 let Rm = 0b1111; 1801 let Inst{5-4} = Rn{5-4}; 1802 let DecoderMethod = "DecodeVLDST1Instruction"; 1803} 1804multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1805 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1806 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1807 "vst1", Dt, "$Vd, $Rn!", 1808 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1809 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1810 let Inst{5-4} = Rn{5-4}; 1811 let DecoderMethod = "DecodeVLDST1Instruction"; 1812 } 1813 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1814 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1815 IIC_VLD1x4u, 1816 "vst1", Dt, "$Vd, $Rn, $Rm", 1817 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1818 let Inst{5-4} = Rn{5-4}; 1819 let DecoderMethod = "DecodeVLDST1Instruction"; 1820 } 1821} 1822 1823def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1824def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1825def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1826def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1827 1828defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1829defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1830defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1831defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1832 1833def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1834def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1835def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1836def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1837def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1838def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1839 1840def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1841def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1842def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1843def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1844def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1845def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1846def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1847def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1848 1849// VST2 : Vector Store (multiple 2-element structures) 1850class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1851 InstrItinClass itin, Operand AddrMode> 1852 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1853 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1854 let Rm = 0b1111; 1855 let Inst{5-4} = Rn{5-4}; 1856 let DecoderMethod = "DecodeVLDST2Instruction"; 1857} 1858 1859def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1860 addrmode6align64or128>, Sched<[WriteVST2]>; 1861def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1862 addrmode6align64or128>, Sched<[WriteVST2]>; 1863def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1864 addrmode6align64or128>, Sched<[WriteVST2]>; 1865 1866def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1867 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1868def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1869 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1870def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1871 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1872 1873def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1874def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1875def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1876 1877// ...with address register writeback: 1878multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1879 RegisterOperand VdTy, Operand AddrMode> { 1880 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1881 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1882 "vst2", Dt, "$Vd, $Rn!", 1883 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1884 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1885 let Inst{5-4} = Rn{5-4}; 1886 let DecoderMethod = "DecodeVLDST2Instruction"; 1887 } 1888 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1889 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1890 "vst2", Dt, "$Vd, $Rn, $Rm", 1891 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1892 let Inst{5-4} = Rn{5-4}; 1893 let DecoderMethod = "DecodeVLDST2Instruction"; 1894 } 1895} 1896multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1897 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1898 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1899 "vst2", Dt, "$Vd, $Rn!", 1900 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1901 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1902 let Inst{5-4} = Rn{5-4}; 1903 let DecoderMethod = "DecodeVLDST2Instruction"; 1904 } 1905 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1906 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1907 IIC_VLD1u, 1908 "vst2", Dt, "$Vd, $Rn, $Rm", 1909 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1910 let Inst{5-4} = Rn{5-4}; 1911 let DecoderMethod = "DecodeVLDST2Instruction"; 1912 } 1913} 1914 1915defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1916 addrmode6align64or128>; 1917defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1918 addrmode6align64or128>; 1919defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1920 addrmode6align64or128>; 1921 1922defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1923defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1924defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1925 1926def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1927def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1928def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1929def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1930def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1931def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1932 1933// ...with double-spaced registers 1934def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1935 addrmode6align64or128>; 1936def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1937 addrmode6align64or128>; 1938def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1939 addrmode6align64or128>; 1940defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 1941 addrmode6align64or128>; 1942defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 1943 addrmode6align64or128>; 1944defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 1945 addrmode6align64or128>; 1946 1947// VST3 : Vector Store (multiple 3-element structures) 1948class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1949 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1950 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1951 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { 1952 let Rm = 0b1111; 1953 let Inst{4} = Rn{4}; 1954 let DecoderMethod = "DecodeVLDST3Instruction"; 1955} 1956 1957def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1958def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1959def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1960 1961def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1962def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1963def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1964 1965// ...with address register writeback: 1966class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1967 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1968 (ins addrmode6:$Rn, am6offset:$Rm, 1969 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1970 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1971 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1972 let Inst{4} = Rn{4}; 1973 let DecoderMethod = "DecodeVLDST3Instruction"; 1974} 1975 1976def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1977def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1978def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1979 1980def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 1981def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 1982def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 1983 1984// ...with double-spaced registers: 1985def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1986def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1987def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1988def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1989def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1990def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1991 1992def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 1993def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 1994def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 1995 1996// ...alternate versions to be allocated odd register numbers: 1997def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1998def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1999def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2000 2001def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2002def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2003def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2004 2005// VST4 : Vector Store (multiple 4-element structures) 2006class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 2007 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2008 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 2009 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 2010 "", []>, Sched<[WriteVST4]> { 2011 let Rm = 0b1111; 2012 let Inst{5-4} = Rn{5-4}; 2013 let DecoderMethod = "DecodeVLDST4Instruction"; 2014} 2015 2016def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 2017def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 2018def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 2019 2020def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2021def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2022def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2023 2024// ...with address register writeback: 2025class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2026 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2027 (ins addrmode6:$Rn, am6offset:$Rm, 2028 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 2029 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 2030 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 2031 let Inst{5-4} = Rn{5-4}; 2032 let DecoderMethod = "DecodeVLDST4Instruction"; 2033} 2034 2035def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 2036def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 2037def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 2038 2039def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2040def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2041def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2042 2043// ...with double-spaced registers: 2044def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2045def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2046def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2047def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2048def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2049def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2050 2051def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2052def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2053def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2054 2055// ...alternate versions to be allocated odd register numbers: 2056def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2057def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2058def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2059 2060def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2061def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2062def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2063 2064} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2065 2066// Classes for VST*LN pseudo-instructions with multi-register operands. 2067// These are expanded to real instructions after register allocation. 2068class VSTQLNPseudo<InstrItinClass itin> 2069 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2070 itin, "">; 2071class VSTQLNWBPseudo<InstrItinClass itin> 2072 : PseudoNLdSt<(outs GPR:$wb), 2073 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2074 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2075class VSTQQLNPseudo<InstrItinClass itin> 2076 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2077 itin, "">; 2078class VSTQQLNWBPseudo<InstrItinClass itin> 2079 : PseudoNLdSt<(outs GPR:$wb), 2080 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2081 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2082class VSTQQQQLNPseudo<InstrItinClass itin> 2083 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2084 itin, "">; 2085class VSTQQQQLNWBPseudo<InstrItinClass itin> 2086 : PseudoNLdSt<(outs GPR:$wb), 2087 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2088 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2089 2090// VST1LN : Vector Store (single element from one lane) 2091class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2092 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2093 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2094 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2095 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2096 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, 2097 Sched<[WriteVST1]> { 2098 let Rm = 0b1111; 2099 let DecoderMethod = "DecodeVST1LN"; 2100} 2101class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2102 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> { 2103 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2104 addrmode6:$addr)]; 2105} 2106 2107def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2108 ARMvgetlaneu, addrmode6> { 2109 let Inst{7-5} = lane{2-0}; 2110} 2111def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2112 ARMvgetlaneu, addrmode6> { 2113 let Inst{7-6} = lane{1-0}; 2114 let Inst{4} = Rn{4}; 2115} 2116 2117def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2118 addrmode6oneL32> { 2119 let Inst{7} = lane{0}; 2120 let Inst{5-4} = Rn{5-4}; 2121} 2122 2123def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>; 2124def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>; 2125def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2126 2127let Predicates = [HasNEON] in { 2128def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2129 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2130def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2131 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2132 2133def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr), 2134 (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 2135def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr), 2136 (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2137} 2138 2139// ...with address register writeback: 2140class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2141 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2142 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2143 (ins AdrMode:$Rn, am6offset:$Rm, 2144 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2145 "\\{$Vd[$lane]\\}, $Rn$Rm", 2146 "$Rn.addr = $wb", 2147 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2148 AdrMode:$Rn, am6offset:$Rm))]>, 2149 Sched<[WriteVST1]> { 2150 let DecoderMethod = "DecodeVST1LN"; 2151} 2152class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2153 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> { 2154 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2155 addrmode6:$addr, am6offset:$offset))]; 2156} 2157 2158def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2159 ARMvgetlaneu, addrmode6> { 2160 let Inst{7-5} = lane{2-0}; 2161} 2162def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2163 ARMvgetlaneu, addrmode6> { 2164 let Inst{7-6} = lane{1-0}; 2165 let Inst{4} = Rn{4}; 2166} 2167def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2168 extractelt, addrmode6oneL32> { 2169 let Inst{7} = lane{0}; 2170 let Inst{5-4} = Rn{5-4}; 2171} 2172 2173def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>; 2174def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>; 2175def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2176 2177let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2178 2179// VST2LN : Vector Store (single 2-element structure from one lane) 2180class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2181 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2182 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2183 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2184 "", []>, Sched<[WriteVST1]> { 2185 let Rm = 0b1111; 2186 let Inst{4} = Rn{4}; 2187 let DecoderMethod = "DecodeVST2LN"; 2188} 2189 2190def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2191 let Inst{7-5} = lane{2-0}; 2192} 2193def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2194 let Inst{7-6} = lane{1-0}; 2195} 2196def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2197 let Inst{7} = lane{0}; 2198} 2199 2200def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2201def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2202def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2203 2204// ...with double-spaced registers: 2205def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2206 let Inst{7-6} = lane{1-0}; 2207 let Inst{4} = Rn{4}; 2208} 2209def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2210 let Inst{7} = lane{0}; 2211 let Inst{4} = Rn{4}; 2212} 2213 2214def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2215def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2216 2217// ...with address register writeback: 2218class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2219 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2220 (ins addrmode6:$Rn, am6offset:$Rm, 2221 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2222 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2223 "$Rn.addr = $wb", []> { 2224 let Inst{4} = Rn{4}; 2225 let DecoderMethod = "DecodeVST2LN"; 2226} 2227 2228def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2229 let Inst{7-5} = lane{2-0}; 2230} 2231def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2232 let Inst{7-6} = lane{1-0}; 2233} 2234def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2235 let Inst{7} = lane{0}; 2236} 2237 2238def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2239def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2240def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2241 2242def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2243 let Inst{7-6} = lane{1-0}; 2244} 2245def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2246 let Inst{7} = lane{0}; 2247} 2248 2249def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2250def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2251 2252// VST3LN : Vector Store (single 3-element structure from one lane) 2253class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2254 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2255 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2256 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2257 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, 2258 Sched<[WriteVST2]> { 2259 let Rm = 0b1111; 2260 let DecoderMethod = "DecodeVST3LN"; 2261} 2262 2263def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2264 let Inst{7-5} = lane{2-0}; 2265} 2266def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2267 let Inst{7-6} = lane{1-0}; 2268} 2269def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2270 let Inst{7} = lane{0}; 2271} 2272 2273def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2274def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2275def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2276 2277// ...with double-spaced registers: 2278def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2279 let Inst{7-6} = lane{1-0}; 2280} 2281def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2282 let Inst{7} = lane{0}; 2283} 2284 2285def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2286def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2287 2288// ...with address register writeback: 2289class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2290 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2291 (ins addrmode6:$Rn, am6offset:$Rm, 2292 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2293 IIC_VST3lnu, "vst3", Dt, 2294 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2295 "$Rn.addr = $wb", []> { 2296 let DecoderMethod = "DecodeVST3LN"; 2297} 2298 2299def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2300 let Inst{7-5} = lane{2-0}; 2301} 2302def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2303 let Inst{7-6} = lane{1-0}; 2304} 2305def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2306 let Inst{7} = lane{0}; 2307} 2308 2309def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2310def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2311def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2312 2313def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2314 let Inst{7-6} = lane{1-0}; 2315} 2316def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2317 let Inst{7} = lane{0}; 2318} 2319 2320def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2321def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2322 2323// VST4LN : Vector Store (single 4-element structure from one lane) 2324class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2325 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2326 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2327 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2328 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2329 "", []>, Sched<[WriteVST2]> { 2330 let Rm = 0b1111; 2331 let Inst{4} = Rn{4}; 2332 let DecoderMethod = "DecodeVST4LN"; 2333} 2334 2335def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2336 let Inst{7-5} = lane{2-0}; 2337} 2338def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2339 let Inst{7-6} = lane{1-0}; 2340} 2341def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2342 let Inst{7} = lane{0}; 2343 let Inst{5} = Rn{5}; 2344} 2345 2346def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2347def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2348def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2349 2350// ...with double-spaced registers: 2351def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2352 let Inst{7-6} = lane{1-0}; 2353} 2354def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2355 let Inst{7} = lane{0}; 2356 let Inst{5} = Rn{5}; 2357} 2358 2359def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2360def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2361 2362// ...with address register writeback: 2363class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2364 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2365 (ins addrmode6:$Rn, am6offset:$Rm, 2366 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2367 IIC_VST4lnu, "vst4", Dt, 2368 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2369 "$Rn.addr = $wb", []> { 2370 let Inst{4} = Rn{4}; 2371 let DecoderMethod = "DecodeVST4LN"; 2372} 2373 2374def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2375 let Inst{7-5} = lane{2-0}; 2376} 2377def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2378 let Inst{7-6} = lane{1-0}; 2379} 2380def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2381 let Inst{7} = lane{0}; 2382 let Inst{5} = Rn{5}; 2383} 2384 2385def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2386def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2387def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2388 2389def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2390 let Inst{7-6} = lane{1-0}; 2391} 2392def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2393 let Inst{7} = lane{0}; 2394 let Inst{5} = Rn{5}; 2395} 2396 2397def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2398def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2399 2400} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2401 2402// Use vld1/vst1 for unaligned f64 load / store 2403let Predicates = [IsLE,HasNEON] in { 2404def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2405 (VLD1d16 addrmode6:$addr)>; 2406def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2407 (VST1d16 addrmode6:$addr, DPR:$value)>; 2408def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2409 (VLD1d8 addrmode6:$addr)>; 2410def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2411 (VST1d8 addrmode6:$addr, DPR:$value)>; 2412} 2413let Predicates = [IsBE,HasNEON] in { 2414def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2415 (VLD1d64 addrmode6:$addr)>; 2416def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2417 (VST1d64 addrmode6:$addr, DPR:$value)>; 2418} 2419 2420// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2421// load / store if it's legal. 2422let Predicates = [HasNEON] in { 2423def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2424 (VLD1q64 addrmode6:$addr)>; 2425def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2426 (VST1q64 addrmode6:$addr, QPR:$value)>; 2427} 2428let Predicates = [IsLE,HasNEON] in { 2429def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2430 (VLD1q32 addrmode6:$addr)>; 2431def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2432 (VST1q32 addrmode6:$addr, QPR:$value)>; 2433def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2434 (VLD1q16 addrmode6:$addr)>; 2435def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2436 (VST1q16 addrmode6:$addr, QPR:$value)>; 2437def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2438 (VLD1q8 addrmode6:$addr)>; 2439def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2440 (VST1q8 addrmode6:$addr, QPR:$value)>; 2441} 2442 2443//===----------------------------------------------------------------------===// 2444// Instruction Classes 2445//===----------------------------------------------------------------------===// 2446 2447// Basic 2-register operations: double- and quad-register. 2448class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2449 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2450 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2451 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2452 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2453 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2454class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2455 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2456 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2457 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2458 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2459 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2460 2461// Basic 2-register intrinsics, both double- and quad-register. 2462class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2463 bits<2> op17_16, bits<5> op11_7, bit op4, 2464 InstrItinClass itin, string OpcodeStr, string Dt, 2465 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2466 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2467 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2468 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2469class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2470 bits<2> op17_16, bits<5> op11_7, bit op4, 2471 InstrItinClass itin, string OpcodeStr, string Dt, 2472 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2473 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2474 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2475 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2476 2477// Same as above, but not predicated. 2478class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2479 InstrItinClass itin, string OpcodeStr, string Dt, 2480 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2481 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2482 itin, OpcodeStr, Dt, 2483 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2484 2485class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2486 InstrItinClass itin, string OpcodeStr, string Dt, 2487 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2488 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2489 itin, OpcodeStr, Dt, 2490 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2491 2492// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2493class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2494 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2495 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2496 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2497 itin, OpcodeStr, Dt, 2498 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2499 2500// Same as N2VQIntXnp but with Vd as a src register. 2501class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2502 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2503 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2504 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2505 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2506 itin, OpcodeStr, Dt, 2507 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2508 let Constraints = "$src = $Vd"; 2509} 2510 2511// Narrow 2-register operations. 2512class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2513 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2514 InstrItinClass itin, string OpcodeStr, string Dt, 2515 ValueType TyD, ValueType TyQ, SDNode OpNode> 2516 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2517 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2518 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2519 2520// Narrow 2-register intrinsics. 2521class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2522 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2523 InstrItinClass itin, string OpcodeStr, string Dt, 2524 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2525 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2526 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2527 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2528 2529// Long 2-register operations (currently only used for VMOVL). 2530class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2531 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2532 InstrItinClass itin, string OpcodeStr, string Dt, 2533 ValueType TyQ, ValueType TyD, SDNode OpNode> 2534 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2535 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2536 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2537 2538// Long 2-register intrinsics. 2539class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2540 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2541 InstrItinClass itin, string OpcodeStr, string Dt, 2542 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2543 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2544 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2545 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2546 2547// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2548class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2549 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2550 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2551 OpcodeStr, Dt, "$Vd, $Vm", 2552 "$src1 = $Vd, $src2 = $Vm", []>; 2553class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2554 InstrItinClass itin, string OpcodeStr, string Dt> 2555 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2556 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2557 "$src1 = $Vd, $src2 = $Vm", []>; 2558 2559// Basic 3-register operations: double- and quad-register. 2560class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2561 InstrItinClass itin, string OpcodeStr, string Dt, 2562 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2563 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2564 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2565 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2566 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2567 // All of these have a two-operand InstAlias. 2568 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2569 let isCommutable = Commutable; 2570} 2571// Same as N3VD but no data type. 2572class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2573 InstrItinClass itin, string OpcodeStr, 2574 ValueType ResTy, ValueType OpTy, 2575 SDNode OpNode, bit Commutable> 2576 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2577 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2578 OpcodeStr, "$Vd, $Vn, $Vm", "", 2579 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2580 // All of these have a two-operand InstAlias. 2581 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2582 let isCommutable = Commutable; 2583} 2584 2585class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2586 InstrItinClass itin, string OpcodeStr, string Dt, 2587 ValueType Ty, SDNode ShOp> 2588 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2589 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2590 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2591 [(set (Ty DPR:$Vd), 2592 (Ty (ShOp (Ty DPR:$Vn), 2593 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2594 // All of these have a two-operand InstAlias. 2595 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2596 let isCommutable = 0; 2597} 2598class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2599 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2600 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2601 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2602 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2603 [(set (Ty DPR:$Vd), 2604 (Ty (ShOp (Ty DPR:$Vn), 2605 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2606 // All of these have a two-operand InstAlias. 2607 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2608 let isCommutable = 0; 2609} 2610 2611class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2612 InstrItinClass itin, string OpcodeStr, string Dt, 2613 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2614 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2615 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2616 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2617 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2618 // All of these have a two-operand InstAlias. 2619 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2620 let isCommutable = Commutable; 2621} 2622class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2623 InstrItinClass itin, string OpcodeStr, 2624 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2625 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2626 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2627 OpcodeStr, "$Vd, $Vn, $Vm", "", 2628 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2629 // All of these have a two-operand InstAlias. 2630 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2631 let isCommutable = Commutable; 2632} 2633class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2634 InstrItinClass itin, string OpcodeStr, string Dt, 2635 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2636 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2637 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2638 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2639 [(set (ResTy QPR:$Vd), 2640 (ResTy (ShOp (ResTy QPR:$Vn), 2641 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2642 imm:$lane)))))]> { 2643 // All of these have a two-operand InstAlias. 2644 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2645 let isCommutable = 0; 2646} 2647class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2648 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2649 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2650 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2651 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2652 [(set (ResTy QPR:$Vd), 2653 (ResTy (ShOp (ResTy QPR:$Vn), 2654 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2655 imm:$lane)))))]> { 2656 // All of these have a two-operand InstAlias. 2657 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2658 let isCommutable = 0; 2659} 2660 2661// Basic 3-register intrinsics, both double- and quad-register. 2662class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2663 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2664 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2665 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2666 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2667 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2668 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2669 // All of these have a two-operand InstAlias. 2670 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2671 let isCommutable = Commutable; 2672} 2673 2674class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2675 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2676 string Dt, ValueType ResTy, ValueType OpTy, 2677 SDPatternOperator IntOp, bit Commutable> 2678 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2679 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2680 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2681 2682class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2683 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2684 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2685 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2686 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2687 [(set (Ty DPR:$Vd), 2688 (Ty (IntOp (Ty DPR:$Vn), 2689 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2690 imm:$lane)))))]> { 2691 let isCommutable = 0; 2692} 2693 2694class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2695 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2696 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2697 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2698 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2699 [(set (Ty DPR:$Vd), 2700 (Ty (IntOp (Ty DPR:$Vn), 2701 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2702 let isCommutable = 0; 2703} 2704class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2705 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2706 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2707 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2708 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2709 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2710 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2711 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2712 let isCommutable = 0; 2713} 2714 2715class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2716 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2717 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2718 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2719 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2720 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2721 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2722 // All of these have a two-operand InstAlias. 2723 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2724 let isCommutable = Commutable; 2725} 2726 2727class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2728 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2729 string Dt, ValueType ResTy, ValueType OpTy, 2730 SDPatternOperator IntOp, bit Commutable> 2731 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2732 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2733 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2734 2735// Same as N3VQIntnp but with Vd as a src register. 2736class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2737 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2738 string Dt, ValueType ResTy, ValueType OpTy, 2739 SDPatternOperator IntOp, bit Commutable> 2740 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2741 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2742 f, itin, OpcodeStr, Dt, 2743 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2744 (OpTy QPR:$Vm))))]> { 2745 let Constraints = "$src = $Vd"; 2746} 2747 2748class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2749 string OpcodeStr, string Dt, 2750 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2751 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2752 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2753 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2754 [(set (ResTy QPR:$Vd), 2755 (ResTy (IntOp (ResTy QPR:$Vn), 2756 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2757 imm:$lane)))))]> { 2758 let isCommutable = 0; 2759} 2760class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2761 string OpcodeStr, string Dt, 2762 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2763 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2764 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2765 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2766 [(set (ResTy QPR:$Vd), 2767 (ResTy (IntOp (ResTy QPR:$Vn), 2768 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2769 imm:$lane)))))]> { 2770 let isCommutable = 0; 2771} 2772class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2773 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2774 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2775 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2776 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2777 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2778 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2779 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2780 let isCommutable = 0; 2781} 2782 2783// Multiply-Add/Sub operations: double- and quad-register. 2784class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2785 InstrItinClass itin, string OpcodeStr, string Dt, 2786 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2787 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2788 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2789 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2790 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2791 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2792 2793class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2794 string OpcodeStr, string Dt, 2795 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2796 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2797 (outs DPR:$Vd), 2798 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2799 NVMulSLFrm, itin, 2800 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2801 [(set (Ty DPR:$Vd), 2802 (Ty (ShOp (Ty DPR:$src1), 2803 (Ty (MulOp DPR:$Vn, 2804 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2805 imm:$lane)))))))]>; 2806class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2807 string OpcodeStr, string Dt, 2808 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2809 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2810 (outs DPR:$Vd), 2811 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2812 NVMulSLFrm, itin, 2813 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2814 [(set (Ty DPR:$Vd), 2815 (Ty (ShOp (Ty DPR:$src1), 2816 (Ty (MulOp DPR:$Vn, 2817 (Ty (ARMvduplane (Ty DPR_8:$Vm), 2818 imm:$lane)))))))]>; 2819 2820class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2821 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2822 SDPatternOperator MulOp, SDPatternOperator OpNode> 2823 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2824 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2825 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2826 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2827 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2828class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2829 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2830 SDPatternOperator MulOp, SDPatternOperator ShOp> 2831 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2832 (outs QPR:$Vd), 2833 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2834 NVMulSLFrm, itin, 2835 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2836 [(set (ResTy QPR:$Vd), 2837 (ResTy (ShOp (ResTy QPR:$src1), 2838 (ResTy (MulOp QPR:$Vn, 2839 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2840 imm:$lane)))))))]>; 2841class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2842 string OpcodeStr, string Dt, 2843 ValueType ResTy, ValueType OpTy, 2844 SDPatternOperator MulOp, SDPatternOperator ShOp> 2845 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2846 (outs QPR:$Vd), 2847 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2848 NVMulSLFrm, itin, 2849 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2850 [(set (ResTy QPR:$Vd), 2851 (ResTy (ShOp (ResTy QPR:$src1), 2852 (ResTy (MulOp QPR:$Vn, 2853 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2854 imm:$lane)))))))]>; 2855 2856// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2857class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2858 InstrItinClass itin, string OpcodeStr, string Dt, 2859 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2860 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2861 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2862 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2863 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2864 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2865class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2866 InstrItinClass itin, string OpcodeStr, string Dt, 2867 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2868 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2869 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2870 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2871 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2872 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2873 2874// Neon 3-argument intrinsics, both double- and quad-register. 2875// The destination register is also used as the first source operand register. 2876class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2877 InstrItinClass itin, string OpcodeStr, string Dt, 2878 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2879 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2880 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2881 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2882 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2883 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2884class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2885 InstrItinClass itin, string OpcodeStr, string Dt, 2886 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2887 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2888 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2889 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2890 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2891 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2892 2893// Long Multiply-Add/Sub operations. 2894class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2895 InstrItinClass itin, string OpcodeStr, string Dt, 2896 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2897 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2898 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2899 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2900 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2901 (TyQ (MulOp (TyD DPR:$Vn), 2902 (TyD DPR:$Vm)))))]>; 2903class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2904 InstrItinClass itin, string OpcodeStr, string Dt, 2905 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2906 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2907 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2908 NVMulSLFrm, itin, 2909 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2910 [(set QPR:$Vd, 2911 (OpNode (TyQ QPR:$src1), 2912 (TyQ (MulOp (TyD DPR:$Vn), 2913 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm), 2914 imm:$lane))))))]>; 2915class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2916 InstrItinClass itin, string OpcodeStr, string Dt, 2917 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2918 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2919 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2920 NVMulSLFrm, itin, 2921 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2922 [(set QPR:$Vd, 2923 (OpNode (TyQ QPR:$src1), 2924 (TyQ (MulOp (TyD DPR:$Vn), 2925 (TyD (ARMvduplane (TyD DPR_8:$Vm), 2926 imm:$lane))))))]>; 2927 2928// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2929class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2930 InstrItinClass itin, string OpcodeStr, string Dt, 2931 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2932 SDNode OpNode> 2933 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2934 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2935 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2936 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2937 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2938 (TyD DPR:$Vm)))))))]>; 2939 2940// Neon Long 3-argument intrinsic. The destination register is 2941// a quad-register and is also used as the first source operand register. 2942class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2943 InstrItinClass itin, string OpcodeStr, string Dt, 2944 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2945 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2946 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2947 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2948 [(set QPR:$Vd, 2949 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2950class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2951 string OpcodeStr, string Dt, 2952 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2953 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2954 (outs QPR:$Vd), 2955 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2956 NVMulSLFrm, itin, 2957 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2958 [(set (ResTy QPR:$Vd), 2959 (ResTy (IntOp (ResTy QPR:$src1), 2960 (OpTy DPR:$Vn), 2961 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2962 imm:$lane)))))]>; 2963class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2964 InstrItinClass itin, string OpcodeStr, string Dt, 2965 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2966 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2967 (outs QPR:$Vd), 2968 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2969 NVMulSLFrm, itin, 2970 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2971 [(set (ResTy QPR:$Vd), 2972 (ResTy (IntOp (ResTy QPR:$src1), 2973 (OpTy DPR:$Vn), 2974 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 2975 imm:$lane)))))]>; 2976 2977// Narrowing 3-register intrinsics. 2978class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2979 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2980 SDPatternOperator IntOp, bit Commutable> 2981 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2982 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2983 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2984 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2985 let isCommutable = Commutable; 2986} 2987 2988// Long 3-register operations. 2989class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2990 InstrItinClass itin, string OpcodeStr, string Dt, 2991 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2992 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2993 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2994 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2995 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2996 let isCommutable = Commutable; 2997} 2998 2999class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 3000 InstrItinClass itin, string OpcodeStr, string Dt, 3001 ValueType TyQ, ValueType TyD, SDNode OpNode> 3002 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3003 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3004 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3005 [(set QPR:$Vd, 3006 (TyQ (OpNode (TyD DPR:$Vn), 3007 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 3008class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3009 InstrItinClass itin, string OpcodeStr, string Dt, 3010 ValueType TyQ, ValueType TyD, SDNode OpNode> 3011 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3012 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3013 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3014 [(set QPR:$Vd, 3015 (TyQ (OpNode (TyD DPR:$Vn), 3016 (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3017 3018// Long 3-register operations with explicitly extended operands. 3019class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3020 InstrItinClass itin, string OpcodeStr, string Dt, 3021 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 3022 bit Commutable> 3023 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3024 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3025 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3026 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3027 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3028 let isCommutable = Commutable; 3029} 3030 3031// Long 3-register intrinsics with explicit extend (VABDL). 3032class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3033 InstrItinClass itin, string OpcodeStr, string Dt, 3034 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3035 bit Commutable> 3036 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3037 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3038 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3039 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3040 (TyD DPR:$Vm))))))]> { 3041 let isCommutable = Commutable; 3042} 3043 3044// Long 3-register intrinsics. 3045class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3046 InstrItinClass itin, string OpcodeStr, string Dt, 3047 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3048 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3049 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3050 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3051 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3052 let isCommutable = Commutable; 3053} 3054 3055// Same as above, but not predicated. 3056class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3057 bit op4, InstrItinClass itin, string OpcodeStr, 3058 string Dt, ValueType ResTy, ValueType OpTy, 3059 SDPatternOperator IntOp, bit Commutable> 3060 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3061 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3062 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 3063 3064class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3065 string OpcodeStr, string Dt, 3066 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3067 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3068 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3069 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3070 [(set (ResTy QPR:$Vd), 3071 (ResTy (IntOp (OpTy DPR:$Vn), 3072 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3073 imm:$lane)))))]>; 3074class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3075 InstrItinClass itin, string OpcodeStr, string Dt, 3076 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3077 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3078 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3079 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3080 [(set (ResTy QPR:$Vd), 3081 (ResTy (IntOp (OpTy DPR:$Vn), 3082 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3083 imm:$lane)))))]>; 3084 3085// Wide 3-register operations. 3086class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3087 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3088 SDNode OpNode, SDNode ExtOp, bit Commutable> 3089 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3090 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3091 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3092 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3093 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3094 // All of these have a two-operand InstAlias. 3095 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3096 let isCommutable = Commutable; 3097} 3098 3099// Pairwise long 2-register intrinsics, both double- and quad-register. 3100class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3101 bits<2> op17_16, bits<5> op11_7, bit op4, 3102 string OpcodeStr, string Dt, 3103 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3104 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3105 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3106 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3107class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3108 bits<2> op17_16, bits<5> op11_7, bit op4, 3109 string OpcodeStr, string Dt, 3110 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3111 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3112 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3113 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3114 3115// Pairwise long 2-register accumulate intrinsics, 3116// both double- and quad-register. 3117// The destination register is also used as the first source operand register. 3118class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3119 bits<2> op17_16, bits<5> op11_7, bit op4, 3120 string OpcodeStr, string Dt, 3121 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3122 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3123 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3124 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3125 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3126class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3127 bits<2> op17_16, bits<5> op11_7, bit op4, 3128 string OpcodeStr, string Dt, 3129 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3130 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3131 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3132 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3133 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3134 3135// Shift by immediate, 3136// both double- and quad-register. 3137let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3138class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3139 Format f, InstrItinClass itin, Operand ImmTy, 3140 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3141 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3142 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3143 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3144 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3145class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3146 Format f, InstrItinClass itin, Operand ImmTy, 3147 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3148 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3149 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3150 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3151 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3152} 3153 3154// Long shift by immediate. 3155class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3156 string OpcodeStr, string Dt, 3157 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3158 SDPatternOperator OpNode> 3159 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3160 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3161 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3162 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3163 3164// Narrow shift by immediate. 3165class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3166 InstrItinClass itin, string OpcodeStr, string Dt, 3167 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3168 SDPatternOperator OpNode> 3169 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3170 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3171 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3172 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3173 (i32 ImmTy:$SIMM))))]>; 3174 3175// Shift right by immediate and accumulate, 3176// both double- and quad-register. 3177let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3178class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3179 Operand ImmTy, string OpcodeStr, string Dt, 3180 ValueType Ty, SDNode ShOp> 3181 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3182 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3183 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3184 [(set DPR:$Vd, (Ty (add DPR:$src1, 3185 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3186class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3187 Operand ImmTy, string OpcodeStr, string Dt, 3188 ValueType Ty, SDNode ShOp> 3189 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3190 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3191 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3192 [(set QPR:$Vd, (Ty (add QPR:$src1, 3193 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3194} 3195 3196// Shift by immediate and insert, 3197// both double- and quad-register. 3198let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3199class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3200 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3201 ValueType Ty,SDNode ShOp> 3202 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3203 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3204 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3205 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3206class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3207 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3208 ValueType Ty,SDNode ShOp> 3209 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3210 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3211 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3212 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3213} 3214 3215// Convert, with fractional bits immediate, 3216// both double- and quad-register. 3217class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3218 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3219 SDPatternOperator IntOp> 3220 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3221 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3222 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3223 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3224class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3225 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3226 SDPatternOperator IntOp> 3227 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3228 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3229 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3230 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3231 3232//===----------------------------------------------------------------------===// 3233// Multiclasses 3234//===----------------------------------------------------------------------===// 3235 3236// Abbreviations used in multiclass suffixes: 3237// Q = quarter int (8 bit) elements 3238// H = half int (16 bit) elements 3239// S = single int (32 bit) elements 3240// D = double int (64 bit) elements 3241 3242// Neon 2-register vector operations and intrinsics. 3243 3244// Neon 2-register comparisons. 3245// source operand element sizes of 8, 16 and 32 bits: 3246multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3247 bits<5> op11_7, bit op4, string opc, string Dt, 3248 string asm, PatFrag fc> { 3249 // 64-bit vector types. 3250 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3251 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3252 opc, !strconcat(Dt, "8"), asm, "", 3253 [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>; 3254 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3255 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3256 opc, !strconcat(Dt, "16"), asm, "", 3257 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>; 3258 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3259 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3260 opc, !strconcat(Dt, "32"), asm, "", 3261 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>; 3262 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3263 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3264 opc, "f32", asm, "", 3265 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> { 3266 let Inst{10} = 1; // overwrite F = 1 3267 } 3268 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3269 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3270 opc, "f16", asm, "", 3271 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>, 3272 Requires<[HasNEON,HasFullFP16]> { 3273 let Inst{10} = 1; // overwrite F = 1 3274 } 3275 3276 // 128-bit vector types. 3277 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3278 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3279 opc, !strconcat(Dt, "8"), asm, "", 3280 [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>; 3281 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3282 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3283 opc, !strconcat(Dt, "16"), asm, "", 3284 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>; 3285 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3286 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3287 opc, !strconcat(Dt, "32"), asm, "", 3288 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>; 3289 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3290 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3291 opc, "f32", asm, "", 3292 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> { 3293 let Inst{10} = 1; // overwrite F = 1 3294 } 3295 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3296 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3297 opc, "f16", asm, "", 3298 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>, 3299 Requires<[HasNEON,HasFullFP16]> { 3300 let Inst{10} = 1; // overwrite F = 1 3301 } 3302} 3303 3304// Neon 3-register comparisons. 3305class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3306 InstrItinClass itin, string OpcodeStr, string Dt, 3307 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3308 : N3V<op24, op23, op21_20, op11_8, 1, op4, 3309 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 3310 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3311 [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> { 3312 // All of these have a two-operand InstAlias. 3313 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3314 let isCommutable = Commutable; 3315} 3316 3317class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3318 InstrItinClass itin, string OpcodeStr, string Dt, 3319 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3320 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3321 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3322 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3323 [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> { 3324 // All of these have a two-operand InstAlias. 3325 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3326 let isCommutable = Commutable; 3327} 3328 3329multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4, 3330 InstrItinClass itinD16, InstrItinClass itinD32, 3331 InstrItinClass itinQ16, InstrItinClass itinQ32, 3332 string OpcodeStr, string Dt, 3333 PatFrag fc, bit Commutable = 0> { 3334 // 64-bit vector types. 3335 def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16, 3336 OpcodeStr, !strconcat(Dt, "8"), 3337 v8i8, v8i8, fc, Commutable>; 3338 def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16, 3339 OpcodeStr, !strconcat(Dt, "16"), 3340 v4i16, v4i16, fc, Commutable>; 3341 def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32, 3342 OpcodeStr, !strconcat(Dt, "32"), 3343 v2i32, v2i32, fc, Commutable>; 3344 3345 // 128-bit vector types. 3346 def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16, 3347 OpcodeStr, !strconcat(Dt, "8"), 3348 v16i8, v16i8, fc, Commutable>; 3349 def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16, 3350 OpcodeStr, !strconcat(Dt, "16"), 3351 v8i16, v8i16, fc, Commutable>; 3352 def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32, 3353 OpcodeStr, !strconcat(Dt, "32"), 3354 v4i32, v4i32, fc, Commutable>; 3355} 3356 3357 3358// Neon 2-register vector intrinsics, 3359// element sizes of 8, 16 and 32 bits: 3360multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3361 bits<5> op11_7, bit op4, 3362 InstrItinClass itinD, InstrItinClass itinQ, 3363 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3364 // 64-bit vector types. 3365 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3366 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3367 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3368 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3369 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3370 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3371 3372 // 128-bit vector types. 3373 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3374 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3375 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3376 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3377 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3378 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3379} 3380 3381 3382// Neon Narrowing 2-register vector operations, 3383// source operand element sizes of 16, 32 and 64 bits: 3384multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3385 bits<5> op11_7, bit op6, bit op4, 3386 InstrItinClass itin, string OpcodeStr, string Dt, 3387 SDNode OpNode> { 3388 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3389 itin, OpcodeStr, !strconcat(Dt, "16"), 3390 v8i8, v8i16, OpNode>; 3391 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3392 itin, OpcodeStr, !strconcat(Dt, "32"), 3393 v4i16, v4i32, OpNode>; 3394 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3395 itin, OpcodeStr, !strconcat(Dt, "64"), 3396 v2i32, v2i64, OpNode>; 3397} 3398 3399// Neon Narrowing 2-register vector intrinsics, 3400// source operand element sizes of 16, 32 and 64 bits: 3401multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3402 bits<5> op11_7, bit op6, bit op4, 3403 InstrItinClass itin, string OpcodeStr, string Dt, 3404 SDPatternOperator IntOp> { 3405 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3406 itin, OpcodeStr, !strconcat(Dt, "16"), 3407 v8i8, v8i16, IntOp>; 3408 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3409 itin, OpcodeStr, !strconcat(Dt, "32"), 3410 v4i16, v4i32, IntOp>; 3411 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3412 itin, OpcodeStr, !strconcat(Dt, "64"), 3413 v2i32, v2i64, IntOp>; 3414} 3415 3416 3417// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3418// source operand element sizes of 16, 32 and 64 bits: 3419multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3420 string OpcodeStr, string Dt, SDNode OpNode> { 3421 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3422 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3423 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3424 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3425 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3426 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3427} 3428 3429 3430// Neon 3-register vector operations. 3431 3432// First with only element sizes of 8, 16 and 32 bits: 3433multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3434 InstrItinClass itinD16, InstrItinClass itinD32, 3435 InstrItinClass itinQ16, InstrItinClass itinQ32, 3436 string OpcodeStr, string Dt, 3437 SDNode OpNode, bit Commutable = 0> { 3438 // 64-bit vector types. 3439 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3440 OpcodeStr, !strconcat(Dt, "8"), 3441 v8i8, v8i8, OpNode, Commutable>; 3442 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3443 OpcodeStr, !strconcat(Dt, "16"), 3444 v4i16, v4i16, OpNode, Commutable>; 3445 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3446 OpcodeStr, !strconcat(Dt, "32"), 3447 v2i32, v2i32, OpNode, Commutable>; 3448 3449 // 128-bit vector types. 3450 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3451 OpcodeStr, !strconcat(Dt, "8"), 3452 v16i8, v16i8, OpNode, Commutable>; 3453 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3454 OpcodeStr, !strconcat(Dt, "16"), 3455 v8i16, v8i16, OpNode, Commutable>; 3456 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3457 OpcodeStr, !strconcat(Dt, "32"), 3458 v4i32, v4i32, OpNode, Commutable>; 3459} 3460 3461multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3462 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3463 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3464 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3465 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3466 v4i32, v2i32, ShOp>; 3467} 3468 3469// ....then also with element size 64 bits: 3470multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3471 InstrItinClass itinD, InstrItinClass itinQ, 3472 string OpcodeStr, string Dt, 3473 SDNode OpNode, bit Commutable = 0> 3474 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3475 OpcodeStr, Dt, OpNode, Commutable> { 3476 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3477 OpcodeStr, !strconcat(Dt, "64"), 3478 v1i64, v1i64, OpNode, Commutable>; 3479 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3480 OpcodeStr, !strconcat(Dt, "64"), 3481 v2i64, v2i64, OpNode, Commutable>; 3482} 3483 3484 3485// Neon 3-register vector intrinsics. 3486 3487// First with only element sizes of 16 and 32 bits: 3488multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3489 InstrItinClass itinD16, InstrItinClass itinD32, 3490 InstrItinClass itinQ16, InstrItinClass itinQ32, 3491 string OpcodeStr, string Dt, 3492 SDPatternOperator IntOp, bit Commutable = 0> { 3493 // 64-bit vector types. 3494 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3495 OpcodeStr, !strconcat(Dt, "16"), 3496 v4i16, v4i16, IntOp, Commutable>; 3497 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3498 OpcodeStr, !strconcat(Dt, "32"), 3499 v2i32, v2i32, IntOp, Commutable>; 3500 3501 // 128-bit vector types. 3502 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3503 OpcodeStr, !strconcat(Dt, "16"), 3504 v8i16, v8i16, IntOp, Commutable>; 3505 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3506 OpcodeStr, !strconcat(Dt, "32"), 3507 v4i32, v4i32, IntOp, Commutable>; 3508} 3509multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3510 InstrItinClass itinD16, InstrItinClass itinD32, 3511 InstrItinClass itinQ16, InstrItinClass itinQ32, 3512 string OpcodeStr, string Dt, 3513 SDPatternOperator IntOp> { 3514 // 64-bit vector types. 3515 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3516 OpcodeStr, !strconcat(Dt, "16"), 3517 v4i16, v4i16, IntOp>; 3518 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3519 OpcodeStr, !strconcat(Dt, "32"), 3520 v2i32, v2i32, IntOp>; 3521 3522 // 128-bit vector types. 3523 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3524 OpcodeStr, !strconcat(Dt, "16"), 3525 v8i16, v8i16, IntOp>; 3526 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3527 OpcodeStr, !strconcat(Dt, "32"), 3528 v4i32, v4i32, IntOp>; 3529} 3530 3531multiclass N3VIntSL_HS<bits<4> op11_8, 3532 InstrItinClass itinD16, InstrItinClass itinD32, 3533 InstrItinClass itinQ16, InstrItinClass itinQ32, 3534 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3535 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3536 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3537 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3538 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3539 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3540 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3541 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3542 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3543} 3544 3545// ....then also with element size of 8 bits: 3546multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3547 InstrItinClass itinD16, InstrItinClass itinD32, 3548 InstrItinClass itinQ16, InstrItinClass itinQ32, 3549 string OpcodeStr, string Dt, 3550 SDPatternOperator IntOp, bit Commutable = 0> 3551 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3552 OpcodeStr, Dt, IntOp, Commutable> { 3553 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3554 OpcodeStr, !strconcat(Dt, "8"), 3555 v8i8, v8i8, IntOp, Commutable>; 3556 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3557 OpcodeStr, !strconcat(Dt, "8"), 3558 v16i8, v16i8, IntOp, Commutable>; 3559} 3560multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3561 InstrItinClass itinD16, InstrItinClass itinD32, 3562 InstrItinClass itinQ16, InstrItinClass itinQ32, 3563 string OpcodeStr, string Dt, 3564 SDPatternOperator IntOp> 3565 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3566 OpcodeStr, Dt, IntOp> { 3567 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3568 OpcodeStr, !strconcat(Dt, "8"), 3569 v8i8, v8i8, IntOp>; 3570 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3571 OpcodeStr, !strconcat(Dt, "8"), 3572 v16i8, v16i8, IntOp>; 3573} 3574 3575 3576// ....then also with element size of 64 bits: 3577multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3578 InstrItinClass itinD16, InstrItinClass itinD32, 3579 InstrItinClass itinQ16, InstrItinClass itinQ32, 3580 string OpcodeStr, string Dt, 3581 SDPatternOperator IntOp, bit Commutable = 0> 3582 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3583 OpcodeStr, Dt, IntOp, Commutable> { 3584 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3585 OpcodeStr, !strconcat(Dt, "64"), 3586 v1i64, v1i64, IntOp, Commutable>; 3587 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3588 OpcodeStr, !strconcat(Dt, "64"), 3589 v2i64, v2i64, IntOp, Commutable>; 3590} 3591multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3592 InstrItinClass itinD16, InstrItinClass itinD32, 3593 InstrItinClass itinQ16, InstrItinClass itinQ32, 3594 string OpcodeStr, string Dt, 3595 SDPatternOperator IntOp> 3596 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3597 OpcodeStr, Dt, IntOp> { 3598 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3599 OpcodeStr, !strconcat(Dt, "64"), 3600 v1i64, v1i64, IntOp>; 3601 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3602 OpcodeStr, !strconcat(Dt, "64"), 3603 v2i64, v2i64, IntOp>; 3604} 3605 3606// Neon Narrowing 3-register vector intrinsics, 3607// source operand element sizes of 16, 32 and 64 bits: 3608multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3609 string OpcodeStr, string Dt, 3610 SDPatternOperator IntOp, bit Commutable = 0> { 3611 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3612 OpcodeStr, !strconcat(Dt, "16"), 3613 v8i8, v8i16, IntOp, Commutable>; 3614 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3615 OpcodeStr, !strconcat(Dt, "32"), 3616 v4i16, v4i32, IntOp, Commutable>; 3617 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3618 OpcodeStr, !strconcat(Dt, "64"), 3619 v2i32, v2i64, IntOp, Commutable>; 3620} 3621 3622 3623// Neon Long 3-register vector operations. 3624 3625multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3626 InstrItinClass itin16, InstrItinClass itin32, 3627 string OpcodeStr, string Dt, 3628 SDNode OpNode, bit Commutable = 0> { 3629 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3630 OpcodeStr, !strconcat(Dt, "8"), 3631 v8i16, v8i8, OpNode, Commutable>; 3632 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3633 OpcodeStr, !strconcat(Dt, "16"), 3634 v4i32, v4i16, OpNode, Commutable>; 3635 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3636 OpcodeStr, !strconcat(Dt, "32"), 3637 v2i64, v2i32, OpNode, Commutable>; 3638} 3639 3640multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3641 InstrItinClass itin, string OpcodeStr, string Dt, 3642 SDNode OpNode> { 3643 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3644 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3645 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3646 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3647} 3648 3649multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3650 InstrItinClass itin16, InstrItinClass itin32, 3651 string OpcodeStr, string Dt, 3652 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3653 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3654 OpcodeStr, !strconcat(Dt, "8"), 3655 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3656 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3657 OpcodeStr, !strconcat(Dt, "16"), 3658 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3659 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3660 OpcodeStr, !strconcat(Dt, "32"), 3661 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3662} 3663 3664// Neon Long 3-register vector intrinsics. 3665 3666// First with only element sizes of 16 and 32 bits: 3667multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3668 InstrItinClass itin16, InstrItinClass itin32, 3669 string OpcodeStr, string Dt, 3670 SDPatternOperator IntOp, bit Commutable = 0> { 3671 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3672 OpcodeStr, !strconcat(Dt, "16"), 3673 v4i32, v4i16, IntOp, Commutable>; 3674 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3675 OpcodeStr, !strconcat(Dt, "32"), 3676 v2i64, v2i32, IntOp, Commutable>; 3677} 3678 3679multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3680 InstrItinClass itin, string OpcodeStr, string Dt, 3681 SDPatternOperator IntOp> { 3682 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3683 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3684 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3685 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3686} 3687 3688// ....then also with element size of 8 bits: 3689multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3690 InstrItinClass itin16, InstrItinClass itin32, 3691 string OpcodeStr, string Dt, 3692 SDPatternOperator IntOp, bit Commutable = 0> 3693 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3694 IntOp, Commutable> { 3695 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3696 OpcodeStr, !strconcat(Dt, "8"), 3697 v8i16, v8i8, IntOp, Commutable>; 3698} 3699 3700// ....with explicit extend (VABDL). 3701multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3702 InstrItinClass itin, string OpcodeStr, string Dt, 3703 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3704 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3705 OpcodeStr, !strconcat(Dt, "8"), 3706 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3707 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3708 OpcodeStr, !strconcat(Dt, "16"), 3709 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3710 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3711 OpcodeStr, !strconcat(Dt, "32"), 3712 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3713} 3714 3715 3716// Neon Wide 3-register vector intrinsics, 3717// source operand element sizes of 8, 16 and 32 bits: 3718multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3719 string OpcodeStr, string Dt, 3720 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3721 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3722 OpcodeStr, !strconcat(Dt, "8"), 3723 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3724 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3725 OpcodeStr, !strconcat(Dt, "16"), 3726 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3727 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3728 OpcodeStr, !strconcat(Dt, "32"), 3729 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3730} 3731 3732 3733// Neon Multiply-Op vector operations, 3734// element sizes of 8, 16 and 32 bits: 3735multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3736 InstrItinClass itinD16, InstrItinClass itinD32, 3737 InstrItinClass itinQ16, InstrItinClass itinQ32, 3738 string OpcodeStr, string Dt, SDNode OpNode> { 3739 // 64-bit vector types. 3740 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3741 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3742 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3743 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3744 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3745 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3746 3747 // 128-bit vector types. 3748 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3749 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3750 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3751 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3752 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3753 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3754} 3755 3756multiclass N3VMulOpSL_HS<bits<4> op11_8, 3757 InstrItinClass itinD16, InstrItinClass itinD32, 3758 InstrItinClass itinQ16, InstrItinClass itinQ32, 3759 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3760 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3761 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3762 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3763 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3764 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3765 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3766 mul, ShOp>; 3767 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3768 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3769 mul, ShOp>; 3770} 3771 3772// Neon Intrinsic-Op vector operations, 3773// element sizes of 8, 16 and 32 bits: 3774multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3775 InstrItinClass itinD, InstrItinClass itinQ, 3776 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3777 SDNode OpNode> { 3778 // 64-bit vector types. 3779 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3780 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3781 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3782 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3783 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3784 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3785 3786 // 128-bit vector types. 3787 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3788 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3789 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3790 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3791 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3792 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3793} 3794 3795// Neon 3-argument intrinsics, 3796// element sizes of 16 and 32 bits: 3797multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3798 InstrItinClass itinD16, InstrItinClass itinD32, 3799 InstrItinClass itinQ16, InstrItinClass itinQ32, 3800 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3801 // 64-bit vector types. 3802 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3803 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3804 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3805 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3806 3807 // 128-bit vector types. 3808 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3809 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3810 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3811 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3812} 3813 3814// element sizes of 8, 16 and 32 bits: 3815multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3816 InstrItinClass itinD16, InstrItinClass itinD32, 3817 InstrItinClass itinQ16, InstrItinClass itinQ32, 3818 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3819 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3820 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3821 // 64-bit vector types. 3822 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3823 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3824 // 128-bit vector types. 3825 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3826 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3827} 3828 3829// Neon Long Multiply-Op vector operations, 3830// element sizes of 8, 16 and 32 bits: 3831multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3832 InstrItinClass itin16, InstrItinClass itin32, 3833 string OpcodeStr, string Dt, SDNode MulOp, 3834 SDNode OpNode> { 3835 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3836 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3837 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3838 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3839 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3840 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3841} 3842 3843multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3844 string Dt, SDNode MulOp, SDNode OpNode> { 3845 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3846 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3847 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3848 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3849} 3850 3851 3852// Neon Long 3-argument intrinsics. 3853 3854// First with only element sizes of 16 and 32 bits: 3855multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3856 InstrItinClass itin16, InstrItinClass itin32, 3857 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3858 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3859 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3860 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3861 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3862} 3863 3864multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3865 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3866 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3867 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3868 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3869 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3870} 3871 3872// ....then also with element size of 8 bits: 3873multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3874 InstrItinClass itin16, InstrItinClass itin32, 3875 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3876 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3877 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3878 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3879} 3880 3881// ....with explicit extend (VABAL). 3882multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3883 InstrItinClass itin, string OpcodeStr, string Dt, 3884 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3885 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3886 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3887 IntOp, ExtOp, OpNode>; 3888 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3889 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3890 IntOp, ExtOp, OpNode>; 3891 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3892 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3893 IntOp, ExtOp, OpNode>; 3894} 3895 3896 3897// Neon Pairwise long 2-register intrinsics, 3898// element sizes of 8, 16 and 32 bits: 3899multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3900 bits<5> op11_7, bit op4, 3901 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3902 // 64-bit vector types. 3903 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3904 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3905 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3906 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3907 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3908 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3909 3910 // 128-bit vector types. 3911 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3912 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3913 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3914 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3915 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3916 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3917} 3918 3919 3920// Neon Pairwise long 2-register accumulate intrinsics, 3921// element sizes of 8, 16 and 32 bits: 3922multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3923 bits<5> op11_7, bit op4, 3924 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3925 // 64-bit vector types. 3926 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3927 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3928 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3929 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3930 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3931 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3932 3933 // 128-bit vector types. 3934 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3935 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3936 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3937 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3938 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3939 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3940} 3941 3942 3943// Neon 2-register vector shift by immediate, 3944// with f of either N2RegVShLFrm or N2RegVShRFrm 3945// element sizes of 8, 16, 32 and 64 bits: 3946multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3947 InstrItinClass itin, string OpcodeStr, string Dt, 3948 SDNode OpNode> { 3949 // 64-bit vector types. 3950 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3951 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3952 let Inst{21-19} = 0b001; // imm6 = 001xxx 3953 } 3954 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3955 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3956 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3957 } 3958 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3959 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3960 let Inst{21} = 0b1; // imm6 = 1xxxxx 3961 } 3962 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3963 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3964 // imm6 = xxxxxx 3965 3966 // 128-bit vector types. 3967 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3968 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3969 let Inst{21-19} = 0b001; // imm6 = 001xxx 3970 } 3971 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3972 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3973 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3974 } 3975 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3976 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3977 let Inst{21} = 0b1; // imm6 = 1xxxxx 3978 } 3979 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3980 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3981 // imm6 = xxxxxx 3982} 3983multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3984 InstrItinClass itin, string OpcodeStr, string Dt, 3985 string baseOpc, SDNode OpNode> { 3986 // 64-bit vector types. 3987 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3988 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3989 let Inst{21-19} = 0b001; // imm6 = 001xxx 3990 } 3991 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3992 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3993 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3994 } 3995 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3996 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3997 let Inst{21} = 0b1; // imm6 = 1xxxxx 3998 } 3999 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4000 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4001 // imm6 = xxxxxx 4002 4003 // 128-bit vector types. 4004 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4005 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4006 let Inst{21-19} = 0b001; // imm6 = 001xxx 4007 } 4008 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4009 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4010 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4011 } 4012 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4013 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4014 let Inst{21} = 0b1; // imm6 = 1xxxxx 4015 } 4016 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4017 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4018 // imm6 = xxxxxx 4019} 4020 4021// Neon Shift-Accumulate vector operations, 4022// element sizes of 8, 16, 32 and 64 bits: 4023multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4024 string OpcodeStr, string Dt, SDNode ShOp> { 4025 // 64-bit vector types. 4026 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4027 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 4028 let Inst{21-19} = 0b001; // imm6 = 001xxx 4029 } 4030 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4031 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 4032 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4033 } 4034 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4035 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 4036 let Inst{21} = 0b1; // imm6 = 1xxxxx 4037 } 4038 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4039 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 4040 // imm6 = xxxxxx 4041 4042 // 128-bit vector types. 4043 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4044 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 4045 let Inst{21-19} = 0b001; // imm6 = 001xxx 4046 } 4047 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4048 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 4049 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4050 } 4051 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4052 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 4053 let Inst{21} = 0b1; // imm6 = 1xxxxx 4054 } 4055 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4056 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 4057 // imm6 = xxxxxx 4058} 4059 4060// Neon Shift-Insert vector operations, 4061// with f of either N2RegVShLFrm or N2RegVShRFrm 4062// element sizes of 8, 16, 32 and 64 bits: 4063multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4064 string OpcodeStr> { 4065 // 64-bit vector types. 4066 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4067 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> { 4068 let Inst{21-19} = 0b001; // imm6 = 001xxx 4069 } 4070 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4071 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> { 4072 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4073 } 4074 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4075 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> { 4076 let Inst{21} = 0b1; // imm6 = 1xxxxx 4077 } 4078 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 4079 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>; 4080 // imm6 = xxxxxx 4081 4082 // 128-bit vector types. 4083 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4084 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> { 4085 let Inst{21-19} = 0b001; // imm6 = 001xxx 4086 } 4087 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4088 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> { 4089 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4090 } 4091 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4092 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> { 4093 let Inst{21} = 0b1; // imm6 = 1xxxxx 4094 } 4095 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4096 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>; 4097 // imm6 = xxxxxx 4098} 4099multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4100 string OpcodeStr> { 4101 // 64-bit vector types. 4102 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4103 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> { 4104 let Inst{21-19} = 0b001; // imm6 = 001xxx 4105 } 4106 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4107 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> { 4108 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4109 } 4110 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4111 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> { 4112 let Inst{21} = 0b1; // imm6 = 1xxxxx 4113 } 4114 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4115 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>; 4116 // imm6 = xxxxxx 4117 4118 // 128-bit vector types. 4119 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4120 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> { 4121 let Inst{21-19} = 0b001; // imm6 = 001xxx 4122 } 4123 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4124 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> { 4125 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4126 } 4127 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4128 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> { 4129 let Inst{21} = 0b1; // imm6 = 1xxxxx 4130 } 4131 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4132 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>; 4133 // imm6 = xxxxxx 4134} 4135 4136// Neon Shift Long operations, 4137// element sizes of 8, 16, 32 bits: 4138multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4139 bit op4, string OpcodeStr, string Dt, 4140 SDPatternOperator OpNode> { 4141 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4142 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4143 let Inst{21-19} = 0b001; // imm6 = 001xxx 4144 } 4145 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4146 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4147 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4148 } 4149 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4150 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4151 let Inst{21} = 0b1; // imm6 = 1xxxxx 4152 } 4153} 4154 4155// Neon Shift Narrow operations, 4156// element sizes of 16, 32, 64 bits: 4157multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4158 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4159 SDPatternOperator OpNode> { 4160 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4161 OpcodeStr, !strconcat(Dt, "16"), 4162 v8i8, v8i16, shr_imm8, OpNode> { 4163 let Inst{21-19} = 0b001; // imm6 = 001xxx 4164 } 4165 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4166 OpcodeStr, !strconcat(Dt, "32"), 4167 v4i16, v4i32, shr_imm16, OpNode> { 4168 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4169 } 4170 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4171 OpcodeStr, !strconcat(Dt, "64"), 4172 v2i32, v2i64, shr_imm32, OpNode> { 4173 let Inst{21} = 0b1; // imm6 = 1xxxxx 4174 } 4175} 4176 4177//===----------------------------------------------------------------------===// 4178// Instruction Definitions. 4179//===----------------------------------------------------------------------===// 4180 4181// Vector Add Operations. 4182 4183// VADD : Vector Add (integer and floating-point) 4184defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4185 add, 1>; 4186def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4187 v2f32, v2f32, fadd, 1>; 4188def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4189 v4f32, v4f32, fadd, 1>; 4190def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", 4191 v4f16, v4f16, fadd, 1>, 4192 Requires<[HasNEON,HasFullFP16]>; 4193def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", 4194 v8f16, v8f16, fadd, 1>, 4195 Requires<[HasNEON,HasFullFP16]>; 4196// VADDL : Vector Add Long (Q = D + D) 4197defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4198 "vaddl", "s", add, sext, 1>; 4199defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4200 "vaddl", "u", add, zanyext, 1>; 4201// VADDW : Vector Add Wide (Q = Q + D) 4202defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4203defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>; 4204// VHADD : Vector Halving Add 4205defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4206 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4207 "vhadd", "s", int_arm_neon_vhadds, 1>; 4208defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4209 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4210 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4211// VRHADD : Vector Rounding Halving Add 4212defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4213 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4214 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4215defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4216 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4217 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4218// VQADD : Vector Saturating Add 4219defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4220 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4221 "vqadd", "s", saddsat, 1>; 4222defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4223 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4224 "vqadd", "u", uaddsat, 1>; 4225// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4226defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4227// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4228defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4229 int_arm_neon_vraddhn, 1>; 4230 4231let Predicates = [HasNEON] in { 4232def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4233 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4234def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4235 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4236def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4237 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4238} 4239 4240// Vector Multiply Operations. 4241 4242// VMUL : Vector Multiply (integer, polynomial and floating-point) 4243defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4244 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4245def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4246 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4247def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4248 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4249def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4250 v2f32, v2f32, fmul, 1>; 4251def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4252 v4f32, v4f32, fmul, 1>; 4253def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", 4254 v4f16, v4f16, fmul, 1>, 4255 Requires<[HasNEON,HasFullFP16]>; 4256def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", 4257 v8f16, v8f16, fmul, 1>, 4258 Requires<[HasNEON,HasFullFP16]>; 4259defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4260def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4261def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4262 v2f32, fmul>; 4263def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, 4264 Requires<[HasNEON,HasFullFP16]>; 4265def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, 4266 v4f16, fmul>, 4267 Requires<[HasNEON,HasFullFP16]>; 4268 4269let Predicates = [HasNEON] in { 4270def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4271 (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))), 4272 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4273 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4274 (DSubReg_i16_reg imm:$lane))), 4275 (SubReg_i16_lane imm:$lane)))>; 4276def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4277 (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))), 4278 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4279 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4280 (DSubReg_i32_reg imm:$lane))), 4281 (SubReg_i32_lane imm:$lane)))>; 4282def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4283 (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))), 4284 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4285 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4286 (DSubReg_i32_reg imm:$lane))), 4287 (SubReg_i32_lane imm:$lane)))>; 4288def : Pat<(v8f16 (fmul (v8f16 QPR:$src1), 4289 (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))), 4290 (v8f16 (VMULslhq(v8f16 QPR:$src1), 4291 (v4f16 (EXTRACT_SUBREG QPR:$src2, 4292 (DSubReg_i16_reg imm:$lane))), 4293 (SubReg_i16_lane imm:$lane)))>; 4294 4295def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4296 (VMULslfd DPR:$Rn, 4297 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4298 (i32 0))>; 4299def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4300 (VMULslhd DPR:$Rn, 4301 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4302 (i32 0))>; 4303def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4304 (VMULslfq QPR:$Rn, 4305 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4306 (i32 0))>; 4307def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4308 (VMULslhq QPR:$Rn, 4309 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4310 (i32 0))>; 4311} 4312 4313// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4314defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4315 IIC_VMULi16Q, IIC_VMULi32Q, 4316 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4317defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4318 IIC_VMULi16Q, IIC_VMULi32Q, 4319 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4320 4321let Predicates = [HasNEON] in { 4322def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4323 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4324 imm:$lane)))), 4325 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4326 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4327 (DSubReg_i16_reg imm:$lane))), 4328 (SubReg_i16_lane imm:$lane)))>; 4329def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4330 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4331 imm:$lane)))), 4332 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4333 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4334 (DSubReg_i32_reg imm:$lane))), 4335 (SubReg_i32_lane imm:$lane)))>; 4336} 4337 4338// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4339defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4340 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4341 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4342defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4343 IIC_VMULi16Q, IIC_VMULi32Q, 4344 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4345 4346let Predicates = [HasNEON] in { 4347def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4348 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4349 imm:$lane)))), 4350 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4351 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4352 (DSubReg_i16_reg imm:$lane))), 4353 (SubReg_i16_lane imm:$lane)))>; 4354def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4355 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4356 imm:$lane)))), 4357 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4358 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4359 (DSubReg_i32_reg imm:$lane))), 4360 (SubReg_i32_lane imm:$lane)))>; 4361} 4362 4363// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4364let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4365 DecoderNamespace = "NEONData" in { 4366 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4367 "vmull", "s", ARMvmulls, 1>; 4368 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4369 "vmull", "u", ARMvmullu, 1>; 4370 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4371 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4372 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4373 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4374 Requires<[HasV8, HasCrypto]>; 4375} 4376defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>; 4377defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>; 4378 4379// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4380defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4381 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4382defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4383 "vqdmull", "s", int_arm_neon_vqdmull>; 4384 4385// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4386 4387// VMLA : Vector Multiply Accumulate (integer and floating-point) 4388defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4389 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4390def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4391 v2f32, fmul_su, fadd_mlx>, 4392 Requires<[HasNEON, UseFPVMLx]>; 4393def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4394 v4f32, fmul_su, fadd_mlx>, 4395 Requires<[HasNEON, UseFPVMLx]>; 4396def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", 4397 v4f16, fmul_su, fadd_mlx>, 4398 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4399def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", 4400 v8f16, fmul_su, fadd_mlx>, 4401 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4402defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4403 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4404def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4405 v2f32, fmul_su, fadd_mlx>, 4406 Requires<[HasNEON, UseFPVMLx]>; 4407def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4408 v4f32, v2f32, fmul_su, fadd_mlx>, 4409 Requires<[HasNEON, UseFPVMLx]>; 4410def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", 4411 v4f16, fmul, fadd>, 4412 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4413def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", 4414 v8f16, v4f16, fmul, fadd>, 4415 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4416 4417let Predicates = [HasNEON] in { 4418def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4419 (mul (v8i16 QPR:$src2), 4420 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4421 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4422 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4423 (DSubReg_i16_reg imm:$lane))), 4424 (SubReg_i16_lane imm:$lane)))>; 4425 4426def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4427 (mul (v4i32 QPR:$src2), 4428 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4429 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4430 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4431 (DSubReg_i32_reg imm:$lane))), 4432 (SubReg_i32_lane imm:$lane)))>; 4433} 4434 4435def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4436 (fmul_su (v4f32 QPR:$src2), 4437 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4438 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4439 (v4f32 QPR:$src2), 4440 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4441 (DSubReg_i32_reg imm:$lane))), 4442 (SubReg_i32_lane imm:$lane)))>, 4443 Requires<[HasNEON, UseFPVMLx]>; 4444 4445// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4446defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4447 "vmlal", "s", ARMvmulls, add>; 4448defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4449 "vmlal", "u", ARMvmullu, add>; 4450 4451defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>; 4452defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>; 4453 4454let Predicates = [HasNEON, HasV8_1a] in { 4455 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4456 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4457 // (Q += D * D) 4458 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4459 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4460 null_frag>; 4461 def : Pat<(v4i16 (saddsat 4462 (v4i16 DPR:$src1), 4463 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4464 (v4i16 DPR:$Vm))))), 4465 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4466 def : Pat<(v2i32 (saddsat 4467 (v2i32 DPR:$src1), 4468 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4469 (v2i32 DPR:$Vm))))), 4470 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4471 def : Pat<(v8i16 (saddsat 4472 (v8i16 QPR:$src1), 4473 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4474 (v8i16 QPR:$Vm))))), 4475 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4476 def : Pat<(v4i32 (saddsat 4477 (v4i32 QPR:$src1), 4478 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4479 (v4i32 QPR:$Vm))))), 4480 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4481 4482 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4483 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4484 null_frag>; 4485 def : Pat<(v4i16 (saddsat 4486 (v4i16 DPR:$src1), 4487 (v4i16 (int_arm_neon_vqrdmulh 4488 (v4i16 DPR:$Vn), 4489 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4490 imm:$lane)))))), 4491 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4492 imm:$lane))>; 4493 def : Pat<(v2i32 (saddsat 4494 (v2i32 DPR:$src1), 4495 (v2i32 (int_arm_neon_vqrdmulh 4496 (v2i32 DPR:$Vn), 4497 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4498 imm:$lane)))))), 4499 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4500 imm:$lane))>; 4501 def : Pat<(v8i16 (saddsat 4502 (v8i16 QPR:$src1), 4503 (v8i16 (int_arm_neon_vqrdmulh 4504 (v8i16 QPR:$src2), 4505 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4506 imm:$lane)))))), 4507 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4508 (v8i16 QPR:$src2), 4509 (v4i16 (EXTRACT_SUBREG 4510 QPR:$src3, 4511 (DSubReg_i16_reg imm:$lane))), 4512 (SubReg_i16_lane imm:$lane)))>; 4513 def : Pat<(v4i32 (saddsat 4514 (v4i32 QPR:$src1), 4515 (v4i32 (int_arm_neon_vqrdmulh 4516 (v4i32 QPR:$src2), 4517 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4518 imm:$lane)))))), 4519 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4520 (v4i32 QPR:$src2), 4521 (v2i32 (EXTRACT_SUBREG 4522 QPR:$src3, 4523 (DSubReg_i32_reg imm:$lane))), 4524 (SubReg_i32_lane imm:$lane)))>; 4525 4526 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4527 // (Q -= D * D) 4528 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4529 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4530 null_frag>; 4531 def : Pat<(v4i16 (ssubsat 4532 (v4i16 DPR:$src1), 4533 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4534 (v4i16 DPR:$Vm))))), 4535 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4536 def : Pat<(v2i32 (ssubsat 4537 (v2i32 DPR:$src1), 4538 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4539 (v2i32 DPR:$Vm))))), 4540 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4541 def : Pat<(v8i16 (ssubsat 4542 (v8i16 QPR:$src1), 4543 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4544 (v8i16 QPR:$Vm))))), 4545 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4546 def : Pat<(v4i32 (ssubsat 4547 (v4i32 QPR:$src1), 4548 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4549 (v4i32 QPR:$Vm))))), 4550 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4551 4552 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4553 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4554 null_frag>; 4555 def : Pat<(v4i16 (ssubsat 4556 (v4i16 DPR:$src1), 4557 (v4i16 (int_arm_neon_vqrdmulh 4558 (v4i16 DPR:$Vn), 4559 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4560 imm:$lane)))))), 4561 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4562 def : Pat<(v2i32 (ssubsat 4563 (v2i32 DPR:$src1), 4564 (v2i32 (int_arm_neon_vqrdmulh 4565 (v2i32 DPR:$Vn), 4566 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4567 imm:$lane)))))), 4568 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4569 imm:$lane))>; 4570 def : Pat<(v8i16 (ssubsat 4571 (v8i16 QPR:$src1), 4572 (v8i16 (int_arm_neon_vqrdmulh 4573 (v8i16 QPR:$src2), 4574 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4575 imm:$lane)))))), 4576 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4577 (v8i16 QPR:$src2), 4578 (v4i16 (EXTRACT_SUBREG 4579 QPR:$src3, 4580 (DSubReg_i16_reg imm:$lane))), 4581 (SubReg_i16_lane imm:$lane)))>; 4582 def : Pat<(v4i32 (ssubsat 4583 (v4i32 QPR:$src1), 4584 (v4i32 (int_arm_neon_vqrdmulh 4585 (v4i32 QPR:$src2), 4586 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4587 imm:$lane)))))), 4588 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4589 (v4i32 QPR:$src2), 4590 (v2i32 (EXTRACT_SUBREG 4591 QPR:$src3, 4592 (DSubReg_i32_reg imm:$lane))), 4593 (SubReg_i32_lane imm:$lane)))>; 4594} 4595// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4596defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4597 "vqdmlal", "s", null_frag>; 4598defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4599 4600let Predicates = [HasNEON] in { 4601def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4602 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4603 (v4i16 DPR:$Vm))))), 4604 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4605def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4606 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4607 (v2i32 DPR:$Vm))))), 4608 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4609def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4610 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4611 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4612 imm:$lane)))))), 4613 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4614def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4615 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4616 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4617 imm:$lane)))))), 4618 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4619} 4620 4621// VMLS : Vector Multiply Subtract (integer and floating-point) 4622defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4623 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4624def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4625 v2f32, fmul_su, fsub_mlx>, 4626 Requires<[HasNEON, UseFPVMLx]>; 4627def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4628 v4f32, fmul_su, fsub_mlx>, 4629 Requires<[HasNEON, UseFPVMLx]>; 4630def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", 4631 v4f16, fmul, fsub>, 4632 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4633def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", 4634 v8f16, fmul, fsub>, 4635 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4636defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4637 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4638def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4639 v2f32, fmul_su, fsub_mlx>, 4640 Requires<[HasNEON, UseFPVMLx]>; 4641def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4642 v4f32, v2f32, fmul_su, fsub_mlx>, 4643 Requires<[HasNEON, UseFPVMLx]>; 4644def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", 4645 v4f16, fmul, fsub>, 4646 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4647def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", 4648 v8f16, v4f16, fmul, fsub>, 4649 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4650 4651let Predicates = [HasNEON] in { 4652def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4653 (mul (v8i16 QPR:$src2), 4654 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4655 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4656 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4657 (DSubReg_i16_reg imm:$lane))), 4658 (SubReg_i16_lane imm:$lane)))>; 4659 4660def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4661 (mul (v4i32 QPR:$src2), 4662 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4663 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4664 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4665 (DSubReg_i32_reg imm:$lane))), 4666 (SubReg_i32_lane imm:$lane)))>; 4667} 4668 4669def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4670 (fmul_su (v4f32 QPR:$src2), 4671 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4672 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4673 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4674 (DSubReg_i32_reg imm:$lane))), 4675 (SubReg_i32_lane imm:$lane)))>, 4676 Requires<[HasNEON, UseFPVMLx]>; 4677 4678// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4679defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4680 "vmlsl", "s", ARMvmulls, sub>; 4681defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4682 "vmlsl", "u", ARMvmullu, sub>; 4683 4684defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>; 4685defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>; 4686 4687// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4688defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4689 "vqdmlsl", "s", null_frag>; 4690defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4691 4692let Predicates = [HasNEON] in { 4693def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4694 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4695 (v4i16 DPR:$Vm))))), 4696 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4697def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4698 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4699 (v2i32 DPR:$Vm))))), 4700 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4701def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4702 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4703 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4704 imm:$lane)))))), 4705 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4706def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4707 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4708 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4709 imm:$lane)))))), 4710 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4711} 4712 4713// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4714def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4715 v2f32, fmul_su, fadd_mlx>, 4716 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4717 4718def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4719 v4f32, fmul_su, fadd_mlx>, 4720 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4721def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", 4722 v4f16, fmul, fadd>, 4723 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4724 4725def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", 4726 v8f16, fmul, fadd>, 4727 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4728 4729// Fused Vector Multiply Subtract (floating-point) 4730def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4731 v2f32, fmul_su, fsub_mlx>, 4732 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4733def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4734 v4f32, fmul_su, fsub_mlx>, 4735 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4736def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", 4737 v4f16, fmul, fsub>, 4738 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4739def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", 4740 v8f16, fmul, fsub>, 4741 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4742 4743// Match @llvm.fma.* intrinsics 4744def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4745 (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4746 Requires<[HasNEON,HasFullFP16]>; 4747def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4748 (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4749 Requires<[HasNEON,HasFullFP16]>; 4750def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4751 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4752 Requires<[HasNEON,HasVFP4]>; 4753def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4754 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4755 Requires<[HasNEON,HasVFP4]>; 4756def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4757 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4758 Requires<[HasNEON,HasVFP4]>; 4759def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4760 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4761 Requires<[HasNEON,HasVFP4]>; 4762 4763// ARMv8.2a dot product instructions. 4764// We put them in the VFPV8 decoder namespace because the ARM and Thumb 4765// encodings are the same and thus no further bit twiddling is necessary 4766// in the disassembler. 4767class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm, 4768 string AsmTy, ValueType AccumTy, ValueType InputTy, 4769 SDPatternOperator OpNode> : 4770 N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst), 4771 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, 4772 Asm, AsmTy, 4773 [(set (AccumTy RegTy:$dst), 4774 (OpNode (AccumTy RegTy:$Vd), 4775 (InputTy RegTy:$Vn), 4776 (InputTy RegTy:$Vm)))]> { 4777 let Predicates = [HasDotProd]; 4778 let DecoderNamespace = "VFPV8"; 4779 let Constraints = "$dst = $Vd"; 4780} 4781 4782def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>; 4783def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>; 4784def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>; 4785def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>; 4786 4787// Indexed dot product instructions: 4788multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty, 4789 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode, 4790 dag RHS> { 4791 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst), 4792 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 4793 N3RegFrm, IIC_VDOTPROD, opc, dt, []> { 4794 bit lane; 4795 let Inst{5} = lane; 4796 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); 4797 let Constraints = "$dst = $Vd"; 4798 let Predicates = [HasDotProd]; 4799 let DecoderNamespace = "VFPV8"; 4800 } 4801 4802 def : Pat< 4803 (AccumType (OpNode (AccumType Ty:$Vd), 4804 (InputType Ty:$Vn), 4805 (InputType (bitconvert (AccumType 4806 (ARMvduplane (AccumType Ty:$Vm), 4807 VectorIndex32:$lane)))))), 4808 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>; 4809} 4810 4811defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8, 4812 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>; 4813defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8, 4814 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>; 4815defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8, 4816 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4817defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8, 4818 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4819 4820// v8.6A matrix multiplication extension 4821let Predicates = [HasMatMulInt8] in { 4822 class N3VMatMul<bit B, bit U, string Asm, string AsmTy, 4823 SDPatternOperator OpNode> 4824 : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst), 4825 (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary, 4826 Asm, AsmTy, 4827 [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd), 4828 (v16i8 QPR:$Vn), 4829 (v16i8 QPR:$Vm)))]> { 4830 let DecoderNamespace = "VFPV8"; 4831 let Constraints = "$dst = $Vd"; 4832 } 4833 4834 multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy, 4835 ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode, 4836 dag RHS> { 4837 4838 def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst), 4839 (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm, 4840 NoItinerary, Asm, AsmTy, []> { 4841 bit lane; 4842 let Inst{5} = lane; 4843 let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane"); 4844 let DecoderNamespace = "VFPV8"; 4845 let Constraints = "$dst = $Vd"; 4846 } 4847 4848 def : Pat< 4849 (AccumTy (OpNode (AccumTy RegTy:$Vd), 4850 (InputTy RegTy:$Vn), 4851 (InputTy (bitconvert (AccumTy 4852 (ARMvduplane (AccumTy RegTy:$Vm), 4853 VectorIndex32:$lane)))))), 4854 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4855 4856 } 4857 4858 multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS> 4859 : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> { 4860 def : Pat< 4861 (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd), 4862 (InputTy (bitconvert (AccumTy 4863 (ARMvduplane (AccumTy RegTy:$Vm), 4864 VectorIndex32:$lane)))), 4865 (InputTy RegTy:$Vn))), 4866 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4867 } 4868 4869 def VSMMLA : N3VMatMul<0, 0, "vsmmla", "s8", int_arm_neon_smmla>; 4870 def VUMMLA : N3VMatMul<0, 1, "vummla", "u8", int_arm_neon_ummla>; 4871 def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>; 4872 def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8, int_arm_neon_usdot>; 4873 def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>; 4874 4875 defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8, 4876 int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>; 4877 defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8, 4878 int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4879 defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>; 4880 defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4881} 4882 4883// ARMv8.3 complex operations 4884class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q, 4885 InstrItinClass itin, dag oops, dag iops, 4886 string opc, string dt, list<dag> pattern> 4887 : N3VCP8<{?,?}, {op21,s}, q, op4, oops, 4888 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ 4889 bits<2> rot; 4890 let Inst{24-23} = rot; 4891} 4892 4893class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q, 4894 InstrItinClass itin, dag oops, dag iops, string opc, 4895 string dt, list<dag> pattern> 4896 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, 4897 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { 4898 bits<1> rot; 4899 let Inst{24} = rot; 4900} 4901 4902class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin, 4903 dag oops, dag iops, string opc, string dt, 4904 list<dag> pattern> 4905 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4906 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4907 bits<2> rot; 4908 bit lane; 4909 4910 let Inst{21-20} = rot; 4911 let Inst{5} = lane; 4912} 4913 4914class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin, 4915 dag oops, dag iops, string opc, string dt, 4916 list<dag> pattern> 4917 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4918 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4919 bits<2> rot; 4920 bit lane; 4921 4922 let Inst{21-20} = rot; 4923 let Inst{5} = Vm{4}; 4924 // This is needed because the lane operand does not have any bits in the 4925 // encoding (it only has one possible value), so we need to manually set it 4926 // to it's default value. 4927 let DecoderMethod = "DecodeNEONComplexLane64Instruction"; 4928} 4929 4930multiclass N3VCP8ComplexTied<bit op21, bit op4, 4931 string OpcodeStr, SDPatternOperator Op> { 4932 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4933 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd), 4934 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4935 OpcodeStr, "f16", []>; 4936 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd), 4937 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4938 OpcodeStr, "f16", []>; 4939 } 4940 let Predicates = [HasNEON,HasV8_3a] in { 4941 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd), 4942 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4943 OpcodeStr, "f32", []>; 4944 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd), 4945 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4946 OpcodeStr, "f32", []>; 4947 } 4948} 4949 4950multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4, 4951 string OpcodeStr, SDPatternOperator Op> { 4952 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4953 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD, 4954 (outs DPR:$Vd), 4955 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4956 OpcodeStr, "f16", []>; 4957 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ, 4958 (outs QPR:$Vd), 4959 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4960 OpcodeStr, "f16", []>; 4961 } 4962 let Predicates = [HasNEON,HasV8_3a] in { 4963 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD, 4964 (outs DPR:$Vd), 4965 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4966 OpcodeStr, "f32", []>; 4967 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ, 4968 (outs QPR:$Vd), 4969 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4970 OpcodeStr, "f32", []>; 4971 } 4972} 4973 4974// These instructions index by pairs of lanes, so the VectorIndexes are twice 4975// as wide as the data types. 4976multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr, 4977 SDPatternOperator Op> { 4978 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4979 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD, 4980 (outs DPR:$Vd), 4981 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4982 VectorIndex32:$lane, complexrotateop:$rot), 4983 OpcodeStr, "f16", []>; 4984 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ, 4985 (outs QPR:$Vd), 4986 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, 4987 VectorIndex32:$lane, complexrotateop:$rot), 4988 OpcodeStr, "f16", []>; 4989 } 4990 let Predicates = [HasNEON,HasV8_3a] in { 4991 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD, 4992 (outs DPR:$Vd), 4993 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 4994 complexrotateop:$rot), 4995 OpcodeStr, "f32", []>; 4996 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ, 4997 (outs QPR:$Vd), 4998 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 4999 complexrotateop:$rot), 5000 OpcodeStr, "f32", []>; 5001 } 5002} 5003 5004defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; 5005defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; 5006defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; 5007 5008let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5009 def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5010 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>; 5011 def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5012 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>; 5013 def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5014 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>; 5015 def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5016 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>; 5017} 5018let Predicates = [HasNEON,HasV8_3a] in { 5019 def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5020 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>; 5021 def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5022 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>; 5023 def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5024 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>; 5025 def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5026 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>; 5027} 5028 5029// Vector Subtract Operations. 5030 5031// VSUB : Vector Subtract (integer and floating-point) 5032defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 5033 "vsub", "i", sub, 0>; 5034def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 5035 v2f32, v2f32, fsub, 0>; 5036def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 5037 v4f32, v4f32, fsub, 0>; 5038def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", 5039 v4f16, v4f16, fsub, 0>, 5040 Requires<[HasNEON,HasFullFP16]>; 5041def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", 5042 v8f16, v8f16, fsub, 0>, 5043 Requires<[HasNEON,HasFullFP16]>; 5044// VSUBL : Vector Subtract Long (Q = D - D) 5045defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5046 "vsubl", "s", sub, sext, 0>; 5047defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5048 "vsubl", "u", sub, zanyext, 0>; 5049// VSUBW : Vector Subtract Wide (Q = Q - D) 5050defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 5051defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>; 5052// VHSUB : Vector Halving Subtract 5053defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 5054 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5055 "vhsub", "s", int_arm_neon_vhsubs, 0>; 5056defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 5057 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5058 "vhsub", "u", int_arm_neon_vhsubu, 0>; 5059// VQSUB : Vector Saturing Subtract 5060defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 5061 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5062 "vqsub", "s", ssubsat, 0>; 5063defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 5064 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5065 "vqsub", "u", usubsat, 0>; 5066// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 5067defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 5068// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 5069defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 5070 int_arm_neon_vrsubhn, 0>; 5071 5072let Predicates = [HasNEON] in { 5073def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 5074 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 5075def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 5076 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 5077def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 5078 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 5079} 5080 5081// Vector Comparisons. 5082 5083// VCEQ : Vector Compare Equal 5084defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5085 IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>; 5086def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 5087 ARMCCeq, 1>; 5088def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 5089 ARMCCeq, 1>; 5090def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, 5091 ARMCCeq, 1>, 5092 Requires<[HasNEON, HasFullFP16]>; 5093def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, 5094 ARMCCeq, 1>, 5095 Requires<[HasNEON, HasFullFP16]>; 5096 5097let TwoOperandAliasConstraint = "$Vm = $Vd" in 5098defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 5099 "$Vd, $Vm, #0", ARMCCeq>; 5100 5101// VCGE : Vector Compare Greater Than or Equal 5102defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5103 IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>; 5104defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5105 IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>; 5106def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 5107 ARMCCge, 0>; 5108def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 5109 ARMCCge, 0>; 5110def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, 5111 ARMCCge, 0>, 5112 Requires<[HasNEON, HasFullFP16]>; 5113def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, 5114 ARMCCge, 0>, 5115 Requires<[HasNEON, HasFullFP16]>; 5116 5117let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5118defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 5119 "$Vd, $Vm, #0", ARMCCge>; 5120defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 5121 "$Vd, $Vm, #0", ARMCCle>; 5122} 5123 5124// VCGT : Vector Compare Greater Than 5125defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5126 IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>; 5127defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5128 IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>; 5129def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 5130 ARMCCgt, 0>; 5131def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 5132 ARMCCgt, 0>; 5133def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, 5134 ARMCCgt, 0>, 5135 Requires<[HasNEON, HasFullFP16]>; 5136def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, 5137 ARMCCgt, 0>, 5138 Requires<[HasNEON, HasFullFP16]>; 5139 5140let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5141defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 5142 "$Vd, $Vm, #0", ARMCCgt>; 5143defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 5144 "$Vd, $Vm, #0", ARMCClt>; 5145} 5146 5147// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 5148def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5149 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 5150def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5151 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 5152def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5153 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, 5154 Requires<[HasNEON, HasFullFP16]>; 5155def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5156 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, 5157 Requires<[HasNEON, HasFullFP16]>; 5158// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 5159def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5160 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 5161def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5162 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 5163def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5164 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, 5165 Requires<[HasNEON, HasFullFP16]>; 5166def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5167 "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>, 5168 Requires<[HasNEON, HasFullFP16]>; 5169// VTST : Vector Test Bits 5170defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 5171 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 5172 5173def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5174 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5175def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5176 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5177def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5178 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5179def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5180 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5181let Predicates = [HasNEON, HasFullFP16] in { 5182def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5183 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5184def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5185 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5186def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5187 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5188def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5189 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5190} 5191 5192// +fp16fml Floating Point Multiplication Variants 5193let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in { 5194 5195class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn, 5196 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5197 : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5198 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5199 5200class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn, 5201 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5202 : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5203 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5204 5205// Vd, Vs, Vs[0-15], Idx[0-1] 5206class VFMD<string opc, string type, bits<2> S> 5207 : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd), 5208 (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx), 5209 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5210 bit idx; 5211 let Inst{3} = idx; 5212 let Inst{19-16} = Vn{4-1}; 5213 let Inst{7} = Vn{0}; 5214 let Inst{5} = Vm{0}; 5215 let Inst{2-0} = Vm{3-1}; 5216} 5217 5218// Vq, Vd, Vd[0-7], Idx[0-3] 5219class VFMQ<string opc, string type, bits<2> S> 5220 : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd), 5221 (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 5222 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5223 bits<2> idx; 5224 let Inst{5} = idx{1}; 5225 let Inst{3} = idx{0}; 5226} 5227 5228// op1 op2 op3 5229def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>; 5230def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>; 5231def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>; 5232def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>; 5233def VFMALDI : VFMD<"vfmal", "f16", 0b00>; 5234def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>; 5235def VFMALQI : VFMQ<"vfmal", "f16", 0b00>; 5236def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>; 5237} // HasNEON, HasFP16FML 5238 5239 5240def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5241 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5242def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5243 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5244def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5245 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5246def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5247 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5248let Predicates = [HasNEON, HasFullFP16] in { 5249def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5250 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5251def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5252 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5253def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5254 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5255def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5256 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5257} 5258 5259// Vector Bitwise Operations. 5260 5261def vnotd : PatFrag<(ops node:$in), 5262 (xor node:$in, ARMimmAllOnesD)>; 5263def vnotq : PatFrag<(ops node:$in), 5264 (xor node:$in, ARMimmAllOnesV)>; 5265 5266 5267// VAND : Vector Bitwise AND 5268def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 5269 v2i32, v2i32, and, 1>; 5270def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 5271 v4i32, v4i32, and, 1>; 5272 5273// VEOR : Vector Bitwise Exclusive OR 5274def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 5275 v2i32, v2i32, xor, 1>; 5276def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 5277 v4i32, v4i32, xor, 1>; 5278 5279// VORR : Vector Bitwise OR 5280def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 5281 v2i32, v2i32, or, 1>; 5282def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 5283 v4i32, v4i32, or, 1>; 5284 5285def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 5286 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5287 IIC_VMOVImm, 5288 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5289 [(set DPR:$Vd, 5290 (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5291 let Inst{9} = SIMM{9}; 5292} 5293 5294def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 5295 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5296 IIC_VMOVImm, 5297 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5298 [(set DPR:$Vd, 5299 (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5300 let Inst{10-9} = SIMM{10-9}; 5301} 5302 5303def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 5304 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5305 IIC_VMOVImm, 5306 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5307 [(set QPR:$Vd, 5308 (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5309 let Inst{9} = SIMM{9}; 5310} 5311 5312def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 5313 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5314 IIC_VMOVImm, 5315 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5316 [(set QPR:$Vd, 5317 (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5318 let Inst{10-9} = SIMM{10-9}; 5319} 5320 5321 5322// VBIC : Vector Bitwise Bit Clear (AND NOT) 5323let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5324def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5325 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5326 "vbic", "$Vd, $Vn, $Vm", "", 5327 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 5328 (vnotd DPR:$Vm))))]>; 5329def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5330 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5331 "vbic", "$Vd, $Vn, $Vm", "", 5332 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 5333 (vnotq QPR:$Vm))))]>; 5334} 5335 5336def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 5337 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5338 IIC_VMOVImm, 5339 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5340 [(set DPR:$Vd, 5341 (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5342 let Inst{9} = SIMM{9}; 5343} 5344 5345def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 5346 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5347 IIC_VMOVImm, 5348 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5349 [(set DPR:$Vd, 5350 (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5351 let Inst{10-9} = SIMM{10-9}; 5352} 5353 5354def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 5355 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5356 IIC_VMOVImm, 5357 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5358 [(set QPR:$Vd, 5359 (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5360 let Inst{9} = SIMM{9}; 5361} 5362 5363def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 5364 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5365 IIC_VMOVImm, 5366 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5367 [(set QPR:$Vd, 5368 (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5369 let Inst{10-9} = SIMM{10-9}; 5370} 5371 5372// VORN : Vector Bitwise OR NOT 5373def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 5374 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5375 "vorn", "$Vd, $Vn, $Vm", "", 5376 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 5377 (vnotd DPR:$Vm))))]>; 5378def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 5379 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5380 "vorn", "$Vd, $Vn, $Vm", "", 5381 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 5382 (vnotq QPR:$Vm))))]>; 5383 5384// VMVN : Vector Bitwise NOT (Immediate) 5385 5386let isReMaterializable = 1 in { 5387 5388def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 5389 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5390 "vmvn", "i16", "$Vd, $SIMM", "", 5391 [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> { 5392 let Inst{9} = SIMM{9}; 5393} 5394 5395def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 5396 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5397 "vmvn", "i16", "$Vd, $SIMM", "", 5398 [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> { 5399 let Inst{9} = SIMM{9}; 5400} 5401 5402def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 5403 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5404 "vmvn", "i32", "$Vd, $SIMM", "", 5405 [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> { 5406 let Inst{11-8} = SIMM{11-8}; 5407} 5408 5409def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 5410 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5411 "vmvn", "i32", "$Vd, $SIMM", "", 5412 [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> { 5413 let Inst{11-8} = SIMM{11-8}; 5414} 5415} 5416 5417// VMVN : Vector Bitwise NOT 5418def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 5419 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 5420 "vmvn", "$Vd, $Vm", "", 5421 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 5422def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 5423 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 5424 "vmvn", "$Vd, $Vm", "", 5425 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 5426let Predicates = [HasNEON] in { 5427def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 5428def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 5429} 5430 5431// The TwoAddress pass will not go looking for equivalent operations 5432// with different register constraints; it just inserts copies. 5433// That is why pseudo VBSP implemented. Is is expanded later into 5434// VBIT/VBIF/VBSL taking into account register constraints to avoid copies. 5435def VBSPd 5436 : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5437 IIC_VBINiD, "", 5438 [(set DPR:$Vd, 5439 (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 5440let Predicates = [HasNEON] in { 5441def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 5442 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 5443 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5444def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 5445 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 5446 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5447def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 5448 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 5449 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5450def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 5451 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 5452 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5453def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 5454 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 5455 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5456 5457def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 5458 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5459 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5460 5461def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 5462 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5463 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5464} 5465 5466def VBSPq 5467 : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5468 IIC_VBINiQ, "", 5469 [(set QPR:$Vd, 5470 (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 5471let Predicates = [HasNEON] in { 5472def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 5473 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 5474 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5475def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 5476 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 5477 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5478def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 5479 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 5480 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5481def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 5482 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 5483 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5484def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 5485 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 5486 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5487 5488def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 5489 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5490 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5491def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 5492 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5493 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5494} 5495 5496// VBSL : Vector Bitwise Select 5497def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5498 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5499 N3RegFrm, IIC_VBINiD, 5500 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5501 []>; 5502 5503def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5504 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5505 N3RegFrm, IIC_VBINiQ, 5506 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5507 []>; 5508 5509// VBIF : Vector Bitwise Insert if False 5510// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 5511def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 5512 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5513 N3RegFrm, IIC_VBINiD, 5514 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5515 []>; 5516def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 5517 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5518 N3RegFrm, IIC_VBINiQ, 5519 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5520 []>; 5521 5522// VBIT : Vector Bitwise Insert if True 5523// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 5524def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 5525 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5526 N3RegFrm, IIC_VBINiD, 5527 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5528 []>; 5529def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 5530 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5531 N3RegFrm, IIC_VBINiQ, 5532 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5533 []>; 5534 5535// Vector Absolute Differences. 5536 5537// VABD : Vector Absolute Difference 5538defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 5539 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5540 "vabd", "s", int_arm_neon_vabds, 1>; 5541defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 5542 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5543 "vabd", "u", int_arm_neon_vabdu, 1>; 5544def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5545 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5546def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5547 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5548def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, 5549 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, 5550 Requires<[HasNEON, HasFullFP16]>; 5551def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5552 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, 5553 Requires<[HasNEON, HasFullFP16]>; 5554 5555// VABDL : Vector Absolute Difference Long (Q = | D - D |) 5556defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5557 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5558defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5559 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5560 5561let Predicates = [HasNEON] in { 5562def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), 5563 (VABDLuv8i16 DPR:$opA, DPR:$opB)>; 5564def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), 5565 (VABDLuv4i32 DPR:$opA, DPR:$opB)>; 5566} 5567 5568// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the 5569// shift/xor pattern for ABS. 5570 5571def abd_shr : 5572 PatFrag<(ops node:$in1, node:$in2, node:$shift), 5573 (ARMvshrsImm (sub (zext node:$in1), 5574 (zext node:$in2)), (i32 $shift))>; 5575 5576let Predicates = [HasNEON] in { 5577def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), 5578 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), 5579 (zext (v2i32 DPR:$opB))), 5580 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), 5581 (VABDLuv2i64 DPR:$opA, DPR:$opB)>; 5582} 5583 5584// VABA : Vector Absolute Difference and Accumulate 5585defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5586 "vaba", "s", int_arm_neon_vabds, add>; 5587defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5588 "vaba", "u", int_arm_neon_vabdu, add>; 5589 5590// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5591defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5592 "vabal", "s", int_arm_neon_vabds, zext, add>; 5593defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5594 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5595 5596// Vector Maximum and Minimum. 5597 5598// VMAX : Vector Maximum 5599defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5600 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5601 "vmax", "s", smax, 1>; 5602defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5603 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5604 "vmax", "u", umax, 1>; 5605def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5606 "vmax", "f32", 5607 v2f32, v2f32, fmaximum, 1>; 5608def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5609 "vmax", "f32", 5610 v4f32, v4f32, fmaximum, 1>; 5611def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, 5612 "vmax", "f16", 5613 v4f16, v4f16, fmaximum, 1>, 5614 Requires<[HasNEON, HasFullFP16]>; 5615def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5616 "vmax", "f16", 5617 v8f16, v8f16, fmaximum, 1>, 5618 Requires<[HasNEON, HasFullFP16]>; 5619 5620// VMAXNM 5621let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5622 def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5623 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5624 v2f32, v2f32, fmaxnum, 1>, 5625 Requires<[HasV8, HasNEON]>; 5626 def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5627 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5628 v4f32, v4f32, fmaxnum, 1>, 5629 Requires<[HasV8, HasNEON]>; 5630 def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, 5631 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5632 v4f16, v4f16, fmaxnum, 1>, 5633 Requires<[HasV8, HasNEON, HasFullFP16]>; 5634 def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, 5635 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5636 v8f16, v8f16, fmaxnum, 1>, 5637 Requires<[HasV8, HasNEON, HasFullFP16]>; 5638} 5639 5640// VMIN : Vector Minimum 5641defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5642 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5643 "vmin", "s", smin, 1>; 5644defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5645 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5646 "vmin", "u", umin, 1>; 5647def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5648 "vmin", "f32", 5649 v2f32, v2f32, fminimum, 1>; 5650def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5651 "vmin", "f32", 5652 v4f32, v4f32, fminimum, 1>; 5653def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, 5654 "vmin", "f16", 5655 v4f16, v4f16, fminimum, 1>, 5656 Requires<[HasNEON, HasFullFP16]>; 5657def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5658 "vmin", "f16", 5659 v8f16, v8f16, fminimum, 1>, 5660 Requires<[HasNEON, HasFullFP16]>; 5661 5662// VMINNM 5663let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5664 def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5665 N3RegFrm, NoItinerary, "vminnm", "f32", 5666 v2f32, v2f32, fminnum, 1>, 5667 Requires<[HasV8, HasNEON]>; 5668 def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5669 N3RegFrm, NoItinerary, "vminnm", "f32", 5670 v4f32, v4f32, fminnum, 1>, 5671 Requires<[HasV8, HasNEON]>; 5672 def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, 5673 N3RegFrm, NoItinerary, "vminnm", "f16", 5674 v4f16, v4f16, fminnum, 1>, 5675 Requires<[HasV8, HasNEON, HasFullFP16]>; 5676 def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, 5677 N3RegFrm, NoItinerary, "vminnm", "f16", 5678 v8f16, v8f16, fminnum, 1>, 5679 Requires<[HasV8, HasNEON, HasFullFP16]>; 5680} 5681 5682// Vector Pairwise Operations. 5683 5684// VPADD : Vector Pairwise Add 5685def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5686 "vpadd", "i8", 5687 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5688def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5689 "vpadd", "i16", 5690 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5691def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5692 "vpadd", "i32", 5693 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5694def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5695 IIC_VPBIND, "vpadd", "f32", 5696 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5697def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, 5698 IIC_VPBIND, "vpadd", "f16", 5699 v4f16, v4f16, int_arm_neon_vpadd, 0>, 5700 Requires<[HasNEON, HasFullFP16]>; 5701 5702// VPADDL : Vector Pairwise Add Long 5703defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5704 int_arm_neon_vpaddls>; 5705defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5706 int_arm_neon_vpaddlu>; 5707 5708// VPADAL : Vector Pairwise Add and Accumulate Long 5709defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5710 int_arm_neon_vpadals>; 5711defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5712 int_arm_neon_vpadalu>; 5713 5714// VPMAX : Vector Pairwise Maximum 5715def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5716 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5717def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5718 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5719def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5720 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5721def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5722 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5723def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5724 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5725def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5726 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5727def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5728 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5729def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5730 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, 5731 Requires<[HasNEON, HasFullFP16]>; 5732 5733// VPMIN : Vector Pairwise Minimum 5734def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5735 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5736def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5737 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5738def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5739 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5740def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5741 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5742def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5743 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5744def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5745 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5746def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5747 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5748def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5749 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, 5750 Requires<[HasNEON, HasFullFP16]>; 5751 5752// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5753 5754// VRECPE : Vector Reciprocal Estimate 5755def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5756 IIC_VUNAD, "vrecpe", "u32", 5757 v2i32, v2i32, int_arm_neon_vrecpe>; 5758def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5759 IIC_VUNAQ, "vrecpe", "u32", 5760 v4i32, v4i32, int_arm_neon_vrecpe>; 5761def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5762 IIC_VUNAD, "vrecpe", "f32", 5763 v2f32, v2f32, int_arm_neon_vrecpe>; 5764def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5765 IIC_VUNAQ, "vrecpe", "f32", 5766 v4f32, v4f32, int_arm_neon_vrecpe>; 5767def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5768 IIC_VUNAD, "vrecpe", "f16", 5769 v4f16, v4f16, int_arm_neon_vrecpe>, 5770 Requires<[HasNEON, HasFullFP16]>; 5771def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5772 IIC_VUNAQ, "vrecpe", "f16", 5773 v8f16, v8f16, int_arm_neon_vrecpe>, 5774 Requires<[HasNEON, HasFullFP16]>; 5775 5776// VRECPS : Vector Reciprocal Step 5777def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5778 IIC_VRECSD, "vrecps", "f32", 5779 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5780def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5781 IIC_VRECSQ, "vrecps", "f32", 5782 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5783def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5784 IIC_VRECSD, "vrecps", "f16", 5785 v4f16, v4f16, int_arm_neon_vrecps, 1>, 5786 Requires<[HasNEON, HasFullFP16]>; 5787def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5788 IIC_VRECSQ, "vrecps", "f16", 5789 v8f16, v8f16, int_arm_neon_vrecps, 1>, 5790 Requires<[HasNEON, HasFullFP16]>; 5791 5792// VRSQRTE : Vector Reciprocal Square Root Estimate 5793def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5794 IIC_VUNAD, "vrsqrte", "u32", 5795 v2i32, v2i32, int_arm_neon_vrsqrte>; 5796def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5797 IIC_VUNAQ, "vrsqrte", "u32", 5798 v4i32, v4i32, int_arm_neon_vrsqrte>; 5799def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5800 IIC_VUNAD, "vrsqrte", "f32", 5801 v2f32, v2f32, int_arm_neon_vrsqrte>; 5802def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5803 IIC_VUNAQ, "vrsqrte", "f32", 5804 v4f32, v4f32, int_arm_neon_vrsqrte>; 5805def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5806 IIC_VUNAD, "vrsqrte", "f16", 5807 v4f16, v4f16, int_arm_neon_vrsqrte>, 5808 Requires<[HasNEON, HasFullFP16]>; 5809def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5810 IIC_VUNAQ, "vrsqrte", "f16", 5811 v8f16, v8f16, int_arm_neon_vrsqrte>, 5812 Requires<[HasNEON, HasFullFP16]>; 5813 5814// VRSQRTS : Vector Reciprocal Square Root Step 5815def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5816 IIC_VRECSD, "vrsqrts", "f32", 5817 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5818def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5819 IIC_VRECSQ, "vrsqrts", "f32", 5820 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5821def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5822 IIC_VRECSD, "vrsqrts", "f16", 5823 v4f16, v4f16, int_arm_neon_vrsqrts, 1>, 5824 Requires<[HasNEON, HasFullFP16]>; 5825def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5826 IIC_VRECSQ, "vrsqrts", "f16", 5827 v8f16, v8f16, int_arm_neon_vrsqrts, 1>, 5828 Requires<[HasNEON, HasFullFP16]>; 5829 5830// Vector Shifts. 5831 5832// VSHL : Vector Shift 5833defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5834 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5835 "vshl", "s", int_arm_neon_vshifts>; 5836defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5837 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5838 "vshl", "u", int_arm_neon_vshiftu>; 5839 5840let Predicates = [HasNEON] in { 5841def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5842 (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>; 5843def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5844 (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>; 5845def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5846 (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>; 5847def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5848 (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>; 5849def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5850 (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>; 5851def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5852 (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>; 5853def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5854 (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>; 5855def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5856 (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>; 5857 5858def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5859 (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>; 5860def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5861 (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>; 5862def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5863 (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>; 5864def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5865 (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>; 5866def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5867 (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>; 5868def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5869 (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>; 5870def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5871 (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>; 5872def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5873 (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>; 5874 5875} 5876 5877// VSHL : Vector Shift Left (Immediate) 5878defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>; 5879 5880// VSHR : Vector Shift Right (Immediate) 5881defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 5882 ARMvshrsImm>; 5883defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 5884 ARMvshruImm>; 5885 5886// VSHLL : Vector Shift Left Long 5887defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 5888 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>; 5889defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 5890 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>; 5891 5892// VSHLL : Vector Shift Left Long (with maximum shift count) 5893class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 5894 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 5895 ValueType OpTy, Operand ImmTy> 5896 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 5897 ResTy, OpTy, ImmTy, null_frag> { 5898 let Inst{21-16} = op21_16; 5899 let DecoderMethod = "DecodeVSHLMaxInstruction"; 5900} 5901def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 5902 v8i16, v8i8, imm8>; 5903def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 5904 v4i32, v4i16, imm16>; 5905def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 5906 v2i64, v2i32, imm32>; 5907 5908let Predicates = [HasNEON] in { 5909def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))), 5910 (VSHLLi8 DPR:$Rn, 8)>; 5911def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))), 5912 (VSHLLi16 DPR:$Rn, 16)>; 5913def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))), 5914 (VSHLLi32 DPR:$Rn, 32)>; 5915def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))), 5916 (VSHLLi8 DPR:$Rn, 8)>; 5917def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))), 5918 (VSHLLi16 DPR:$Rn, 16)>; 5919def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))), 5920 (VSHLLi32 DPR:$Rn, 32)>; 5921def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))), 5922 (VSHLLi8 DPR:$Rn, 8)>; 5923def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))), 5924 (VSHLLi16 DPR:$Rn, 16)>; 5925def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))), 5926 (VSHLLi32 DPR:$Rn, 32)>; 5927} 5928 5929// VSHRN : Vector Shift Right and Narrow 5930defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 5931 PatFrag<(ops node:$Rn, node:$amt), 5932 (trunc (ARMvshrsImm node:$Rn, node:$amt))>>; 5933 5934let Predicates = [HasNEON] in { 5935def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))), 5936 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 5937def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))), 5938 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 5939def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))), 5940 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 5941} 5942 5943// VRSHL : Vector Rounding Shift 5944defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 5945 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5946 "vrshl", "s", int_arm_neon_vrshifts>; 5947defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 5948 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5949 "vrshl", "u", int_arm_neon_vrshiftu>; 5950// VRSHR : Vector Rounding Shift Right 5951defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 5952 NEONvrshrsImm>; 5953defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 5954 NEONvrshruImm>; 5955 5956// VRSHRN : Vector Rounding Shift Right and Narrow 5957defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 5958 NEONvrshrnImm>; 5959 5960// VQSHL : Vector Saturating Shift 5961defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 5962 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5963 "vqshl", "s", int_arm_neon_vqshifts>; 5964defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 5965 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5966 "vqshl", "u", int_arm_neon_vqshiftu>; 5967// VQSHL : Vector Saturating Shift Left (Immediate) 5968defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>; 5969defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>; 5970 5971// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 5972defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>; 5973 5974// VQSHRN : Vector Saturating Shift Right and Narrow 5975defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 5976 NEONvqshrnsImm>; 5977defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 5978 NEONvqshrnuImm>; 5979 5980// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 5981defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 5982 NEONvqshrnsuImm>; 5983 5984// VQRSHL : Vector Saturating Rounding Shift 5985defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 5986 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5987 "vqrshl", "s", int_arm_neon_vqrshifts>; 5988defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 5989 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5990 "vqrshl", "u", int_arm_neon_vqrshiftu>; 5991 5992// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 5993defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 5994 NEONvqrshrnsImm>; 5995defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 5996 NEONvqrshrnuImm>; 5997 5998// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 5999defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 6000 NEONvqrshrnsuImm>; 6001 6002// VSRA : Vector Shift Right and Accumulate 6003defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>; 6004defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>; 6005// VRSRA : Vector Rounding Shift Right and Accumulate 6006defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>; 6007defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>; 6008 6009// VSLI : Vector Shift Left and Insert 6010defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 6011 6012// VSRI : Vector Shift Right and Insert 6013defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 6014 6015// Vector Absolute and Saturating Absolute. 6016 6017// VABS : Vector Absolute Value 6018defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 6019 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; 6020def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6021 "vabs", "f32", 6022 v2f32, v2f32, fabs>; 6023def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6024 "vabs", "f32", 6025 v4f32, v4f32, fabs>; 6026def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6027 "vabs", "f16", 6028 v4f16, v4f16, fabs>, 6029 Requires<[HasNEON, HasFullFP16]>; 6030def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6031 "vabs", "f16", 6032 v8f16, v8f16, fabs>, 6033 Requires<[HasNEON, HasFullFP16]>; 6034 6035// VQABS : Vector Saturating Absolute Value 6036defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 6037 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 6038 int_arm_neon_vqabs>; 6039 6040// Vector Negate. 6041 6042def vnegd : PatFrag<(ops node:$in), 6043 (sub ARMimmAllZerosD, node:$in)>; 6044def vnegq : PatFrag<(ops node:$in), 6045 (sub ARMimmAllZerosV, node:$in)>; 6046 6047class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6048 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 6049 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 6050 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 6051class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6052 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 6053 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 6054 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 6055 6056// VNEG : Vector Negate (integer) 6057def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 6058def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 6059def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 6060def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 6061def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 6062def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 6063 6064// VNEG : Vector Negate (floating-point) 6065def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 6066 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6067 "vneg", "f32", "$Vd, $Vm", "", 6068 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 6069def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 6070 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6071 "vneg", "f32", "$Vd, $Vm", "", 6072 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 6073def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, 6074 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6075 "vneg", "f16", "$Vd, $Vm", "", 6076 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, 6077 Requires<[HasNEON, HasFullFP16]>; 6078def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, 6079 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6080 "vneg", "f16", "$Vd, $Vm", "", 6081 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, 6082 Requires<[HasNEON, HasFullFP16]>; 6083 6084let Predicates = [HasNEON] in { 6085def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 6086def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 6087def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 6088def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 6089def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 6090def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 6091} 6092 6093// VQNEG : Vector Saturating Negate 6094defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 6095 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 6096 int_arm_neon_vqneg>; 6097 6098// Vector Bit Counting Operations. 6099 6100// VCLS : Vector Count Leading Sign Bits 6101defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 6102 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 6103 int_arm_neon_vcls>; 6104// VCLZ : Vector Count Leading Zeros 6105defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 6106 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 6107 ctlz>; 6108// VCNT : Vector Count One Bits 6109def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6110 IIC_VCNTiD, "vcnt", "8", 6111 v8i8, v8i8, ctpop>; 6112def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6113 IIC_VCNTiQ, "vcnt", "8", 6114 v16i8, v16i8, ctpop>; 6115 6116// Vector Swap 6117def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 6118 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 6119 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6120 []>; 6121def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 6122 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 6123 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6124 []>; 6125 6126// Vector Move Operations. 6127 6128// VMOV : Vector Move (Register) 6129def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6130 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6131def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6132 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6133 6134// VMOV : Vector Move (Immediate) 6135 6136// Although VMOVs are not strictly speaking cheap, they are as expensive 6137// as their copies counterpart (VORR), so we should prefer rematerialization 6138// over splitting when it applies. 6139let isReMaterializable = 1, isAsCheapAsAMove=1 in { 6140def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 6141 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6142 "vmov", "i8", "$Vd, $SIMM", "", 6143 [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>; 6144def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 6145 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6146 "vmov", "i8", "$Vd, $SIMM", "", 6147 [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>; 6148 6149def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 6150 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6151 "vmov", "i16", "$Vd, $SIMM", "", 6152 [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> { 6153 let Inst{9} = SIMM{9}; 6154} 6155 6156def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 6157 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6158 "vmov", "i16", "$Vd, $SIMM", "", 6159 [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> { 6160 let Inst{9} = SIMM{9}; 6161} 6162 6163def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 6164 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6165 "vmov", "i32", "$Vd, $SIMM", "", 6166 [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> { 6167 let Inst{11-8} = SIMM{11-8}; 6168} 6169 6170def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 6171 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6172 "vmov", "i32", "$Vd, $SIMM", "", 6173 [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> { 6174 let Inst{11-8} = SIMM{11-8}; 6175} 6176 6177def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 6178 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6179 "vmov", "i64", "$Vd, $SIMM", "", 6180 [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>; 6181def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 6182 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6183 "vmov", "i64", "$Vd, $SIMM", "", 6184 [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>; 6185 6186def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 6187 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6188 "vmov", "f32", "$Vd, $SIMM", "", 6189 [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>; 6190def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 6191 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6192 "vmov", "f32", "$Vd, $SIMM", "", 6193 [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>; 6194} // isReMaterializable, isAsCheapAsAMove 6195 6196// Add support for bytes replication feature, so it could be GAS compatible. 6197multiclass NEONImmReplicateI8InstAlias<ValueType To> { 6198 // E.g. instructions below: 6199 // "vmov.i32 d0, #0xffffffff" 6200 // "vmov.i32 d0, #0xabababab" 6201 // "vmov.i16 d0, #0xabab" 6202 // are incorrect, but we could deal with such cases. 6203 // For last two instructions, for example, it should emit: 6204 // "vmov.i8 d0, #0xab" 6205 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6206 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6207 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6208 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6209 // Also add same support for VMVN instructions. So instruction: 6210 // "vmvn.i32 d0, #0xabababab" 6211 // actually means: 6212 // "vmov.i8 d0, #0x54" 6213 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6214 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6215 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6216 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6217} 6218 6219defm : NEONImmReplicateI8InstAlias<i16>; 6220defm : NEONImmReplicateI8InstAlias<i32>; 6221defm : NEONImmReplicateI8InstAlias<i64>; 6222 6223// Similar to above for types other than i8, e.g.: 6224// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00" 6225// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000" 6226// In this case we do not canonicalize VMVN to VMOV 6227multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16, 6228 NeonI NV8, NeonI NV16, ValueType To> { 6229 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6230 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6231 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6232 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6233 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6234 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6235 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6236 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6237} 6238 6239defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6240 VMVNv4i16, VMVNv8i16, i32>; 6241defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6242 VMVNv4i16, VMVNv8i16, i64>; 6243defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32, 6244 VMVNv2i32, VMVNv4i32, i64>; 6245// TODO: add "VMOV <-> VMVN" conversion for cases like 6246// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55" 6247// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00" 6248 6249// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 6250// require zero cycles to execute so they should be used wherever possible for 6251// setting a register to zero. 6252 6253// Even without these pseudo-insts we would probably end up with the correct 6254// instruction, but we could not mark the general ones with "isAsCheapAsAMove" 6255// since they are sometimes rather expensive (in general). 6256 6257let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 6258 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 6259 [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))], 6260 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 6261 Requires<[HasZCZ]>; 6262 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 6263 [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))], 6264 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 6265 Requires<[HasZCZ]>; 6266} 6267 6268// VMOV : Vector Get Lane (move scalar to ARM core register) 6269 6270def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 6271 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6272 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 6273 [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V), 6274 imm:$lane))]> { 6275 let Inst{21} = lane{2}; 6276 let Inst{6-5} = lane{1-0}; 6277} 6278def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 6279 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6280 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 6281 [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V), 6282 imm:$lane))]> { 6283 let Inst{21} = lane{1}; 6284 let Inst{6} = lane{0}; 6285} 6286def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 6287 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6288 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 6289 [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V), 6290 imm:$lane))]> { 6291 let Inst{21} = lane{2}; 6292 let Inst{6-5} = lane{1-0}; 6293} 6294def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 6295 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6296 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 6297 [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V), 6298 imm:$lane))]> { 6299 let Inst{21} = lane{1}; 6300 let Inst{6} = lane{0}; 6301} 6302def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 6303 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 6304 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 6305 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 6306 imm:$lane))]>, 6307 Requires<[HasFPRegs, HasFastVGETLNi32]> { 6308 let Inst{21} = lane{0}; 6309} 6310let Predicates = [HasNEON] in { 6311// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 6312def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane), 6313 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6314 (DSubReg_i8_reg imm:$lane))), 6315 (SubReg_i8_lane imm:$lane))>; 6316def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane), 6317 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6318 (DSubReg_i16_reg imm:$lane))), 6319 (SubReg_i16_lane imm:$lane))>; 6320def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane), 6321 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6322 (DSubReg_i8_reg imm:$lane))), 6323 (SubReg_i8_lane imm:$lane))>; 6324def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane), 6325 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6326 (DSubReg_i16_reg imm:$lane))), 6327 (SubReg_i16_lane imm:$lane))>; 6328} 6329def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6330 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 6331 (DSubReg_i32_reg imm:$lane))), 6332 (SubReg_i32_lane imm:$lane))>, 6333 Requires<[HasNEON, HasFastVGETLNi32]>; 6334def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 6335 (COPY_TO_REGCLASS 6336 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6337 Requires<[HasNEON, HasSlowVGETLNi32]>; 6338def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6339 (COPY_TO_REGCLASS 6340 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6341 Requires<[HasNEON, HasSlowVGETLNi32]>; 6342let Predicates = [HasNEON] in { 6343def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 6344 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 6345 (SSubReg_f32_reg imm:$src2))>; 6346def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 6347 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 6348 (SSubReg_f32_reg imm:$src2))>; 6349//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 6350// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6351def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 6352 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6353} 6354 6355multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> { 6356 def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane), 6357 (EXTRACT_SUBREG 6358 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6359 (SSubReg_f16_reg imm_even:$lane))>; 6360 def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane), 6361 (EXTRACT_SUBREG 6362 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6363 (SSubReg_f16_reg imm_even:$lane))>; 6364} 6365 6366multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> { 6367 def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane), 6368 (COPY_TO_REGCLASS 6369 (VMOVH (EXTRACT_SUBREG 6370 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6371 (SSubReg_f16_reg imm_odd:$lane))), 6372 HPR)>; 6373 def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane), 6374 (COPY_TO_REGCLASS 6375 (VMOVH (EXTRACT_SUBREG 6376 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6377 (SSubReg_f16_reg imm_odd:$lane))), 6378 HPR)>; 6379} 6380 6381let Predicates = [HasNEON] in { 6382 defm : ExtractEltEvenF16<v4f16, v8f16>; 6383 defm : ExtractEltOddF16VMOVH<v4f16, v8f16>; 6384} 6385 6386let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in { 6387 // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes 6388 defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>; 6389} 6390 6391let Predicates = [HasBF16, HasNEON] in { 6392 defm : ExtractEltEvenF16<v4bf16, v8bf16>; 6393 6394 // Otherwise, if VMOVH is not available resort to extracting the odd lane 6395 // into a GPR and then moving to HPR 6396 def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane), 6397 (COPY_TO_REGCLASS 6398 (VGETLNu16 (v4bf16 DPR:$src), imm:$lane), 6399 HPR)>; 6400 6401 def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane), 6402 (COPY_TO_REGCLASS 6403 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6404 (DSubReg_i16_reg imm:$lane))), 6405 (SubReg_i16_lane imm:$lane)), 6406 HPR)>; 6407} 6408 6409// VMOV : Vector Set Lane (move ARM core register to scalar) 6410 6411let Constraints = "$src1 = $V" in { 6412def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 6413 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 6414 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 6415 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 6416 GPR:$R, imm:$lane))]> { 6417 let Inst{21} = lane{2}; 6418 let Inst{6-5} = lane{1-0}; 6419} 6420def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 6421 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 6422 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 6423 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 6424 GPR:$R, imm:$lane))]> { 6425 let Inst{21} = lane{1}; 6426 let Inst{6} = lane{0}; 6427} 6428def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 6429 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 6430 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 6431 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 6432 GPR:$R, imm:$lane))]>, 6433 Requires<[HasVFP2]> { 6434 let Inst{21} = lane{0}; 6435 // This instruction is equivalent as 6436 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 6437 let isInsertSubreg = 1; 6438} 6439} 6440 6441// TODO: for odd lanes we could optimize this a bit by using the VINS 6442// FullFP16 instruction when it is available 6443multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> { 6444 def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6445 (VT4 (VSETLNi16 DPR:$src1, 6446 (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>; 6447 def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6448 (VT8 (INSERT_SUBREG QPR:$src1, 6449 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6450 (DSubReg_i16_reg imm:$lane))), 6451 (COPY_TO_REGCLASS HPR:$src2, GPR), 6452 (SubReg_i16_lane imm:$lane))), 6453 (DSubReg_i16_reg imm:$lane)))>; 6454} 6455 6456let Predicates = [HasNEON] in { 6457def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 6458 (v16i8 (INSERT_SUBREG QPR:$src1, 6459 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 6460 (DSubReg_i8_reg imm:$lane))), 6461 GPR:$src2, (SubReg_i8_lane imm:$lane))), 6462 (DSubReg_i8_reg imm:$lane)))>; 6463def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 6464 (v8i16 (INSERT_SUBREG QPR:$src1, 6465 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6466 (DSubReg_i16_reg imm:$lane))), 6467 GPR:$src2, (SubReg_i16_lane imm:$lane))), 6468 (DSubReg_i16_reg imm:$lane)))>; 6469def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 6470 (v4i32 (INSERT_SUBREG QPR:$src1, 6471 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 6472 (DSubReg_i32_reg imm:$lane))), 6473 GPR:$src2, (SubReg_i32_lane imm:$lane))), 6474 (DSubReg_i32_reg imm:$lane)))>; 6475 6476def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 6477 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 6478 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6479def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 6480 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 6481 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6482 6483defm : InsertEltF16<f16, v4f16, v8f16>; 6484 6485//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6486// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6487def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6488 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6489 6490def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 6491 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6492def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 6493 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 6494def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 6495 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6496 6497def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 6498 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6499def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 6500 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6501def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 6502 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6503 6504def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 6505 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6506 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6507 dsub_0)>; 6508def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 6509 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6510 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6511 dsub_0)>; 6512def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 6513 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6514 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6515 dsub_0)>; 6516} 6517 6518let Predicates = [HasNEON, HasBF16] in 6519defm : InsertEltF16<bf16, v4bf16, v8bf16>; 6520 6521// VDUP : Vector Duplicate (from ARM core register to all elements) 6522 6523class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6524 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 6525 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6526 [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6527class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6528 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 6529 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6530 [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6531 6532def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 6533def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 6534def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 6535 Requires<[HasNEON, HasFastVDUP32]>; 6536def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 6537def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 6538def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 6539 6540// ARMvdup patterns for uarchs with fast VDUP.32. 6541def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 6542 Requires<[HasNEON,HasFastVDUP32]>; 6543def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>, 6544 Requires<[HasNEON]>; 6545 6546// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 6547def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 6548 Requires<[HasNEON,HasSlowVDUP32]>; 6549def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 6550 Requires<[HasNEON,HasSlowVDUP32]>; 6551 6552// VDUP : Vector Duplicate Lane (from scalar to all elements) 6553 6554class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 6555 ValueType Ty, Operand IdxTy> 6556 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6557 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 6558 [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>; 6559 6560class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 6561 ValueType ResTy, ValueType OpTy, Operand IdxTy> 6562 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6563 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 6564 [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm), 6565 VectorIndex32:$lane)))]>; 6566 6567// Inst{19-16} is partially specified depending on the element size. 6568 6569def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 6570 bits<3> lane; 6571 let Inst{19-17} = lane{2-0}; 6572} 6573def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 6574 bits<2> lane; 6575 let Inst{19-18} = lane{1-0}; 6576} 6577def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 6578 bits<1> lane; 6579 let Inst{19} = lane{0}; 6580} 6581def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 6582 bits<3> lane; 6583 let Inst{19-17} = lane{2-0}; 6584} 6585def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 6586 bits<2> lane; 6587 let Inst{19-18} = lane{1-0}; 6588} 6589def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 6590 bits<1> lane; 6591 let Inst{19} = lane{0}; 6592} 6593 6594let Predicates = [HasNEON] in { 6595def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)), 6596 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6597 6598def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6599 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6600 6601def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6602 (VDUPLN32q DPR:$Vm, imm:$lane)>; 6603 6604def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)), 6605 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 6606 (DSubReg_i8_reg imm:$lane))), 6607 (SubReg_i8_lane imm:$lane)))>; 6608def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)), 6609 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 6610 (DSubReg_i16_reg imm:$lane))), 6611 (SubReg_i16_lane imm:$lane)))>; 6612def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)), 6613 (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src, 6614 (DSubReg_i16_reg imm:$lane))), 6615 (SubReg_i16_lane imm:$lane)))>; 6616def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)), 6617 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 6618 (DSubReg_i32_reg imm:$lane))), 6619 (SubReg_i32_lane imm:$lane)))>; 6620def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)), 6621 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 6622 (DSubReg_i32_reg imm:$lane))), 6623 (SubReg_i32_lane imm:$lane)))>; 6624 6625def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))), 6626 (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6627 (f16 HPR:$src), ssub_0), (i32 0)))>; 6628def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))), 6629 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6630 SPR:$src, ssub_0), (i32 0)))>; 6631def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))), 6632 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6633 SPR:$src, ssub_0), (i32 0)))>; 6634def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))), 6635 (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6636 (f16 HPR:$src), ssub_0), (i32 0)))>; 6637} 6638 6639let Predicates = [HasNEON, HasBF16] in { 6640def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)), 6641 (VDUPLN16d DPR:$Vm, imm:$lane)>; 6642 6643def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)), 6644 (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src, 6645 (DSubReg_i16_reg imm:$lane))), 6646 (SubReg_i16_lane imm:$lane)))>; 6647 6648def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))), 6649 (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6650 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6651def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))), 6652 (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6653 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6654} 6655 6656// VMOVN : Vector Narrowing Move 6657defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 6658 "vmovn", "i", trunc>; 6659// VQMOVN : Vector Saturating Narrowing Move 6660defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 6661 "vqmovn", "s", int_arm_neon_vqmovns>; 6662defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 6663 "vqmovn", "u", int_arm_neon_vqmovnu>; 6664defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 6665 "vqmovun", "s", int_arm_neon_vqmovnsu>; 6666// VMOVL : Vector Lengthening Move 6667defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 6668defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 6669 6670let Predicates = [HasNEON] in { 6671def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 6672def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 6673def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 6674} 6675 6676// Vector Conversions. 6677 6678// VCVT : Vector Convert Between Floating-Point and Integers 6679def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6680 v2i32, v2f32, fp_to_sint>; 6681def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6682 v2i32, v2f32, fp_to_uint>; 6683def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6684 v2f32, v2i32, sint_to_fp>; 6685def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6686 v2f32, v2i32, uint_to_fp>; 6687 6688def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6689 v4i32, v4f32, fp_to_sint>; 6690def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6691 v4i32, v4f32, fp_to_uint>; 6692def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6693 v4f32, v4i32, sint_to_fp>; 6694def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6695 v4f32, v4i32, uint_to_fp>; 6696 6697def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6698 v4i16, v4f16, fp_to_sint>, 6699 Requires<[HasNEON, HasFullFP16]>; 6700def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6701 v4i16, v4f16, fp_to_uint>, 6702 Requires<[HasNEON, HasFullFP16]>; 6703def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6704 v4f16, v4i16, sint_to_fp>, 6705 Requires<[HasNEON, HasFullFP16]>; 6706def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6707 v4f16, v4i16, uint_to_fp>, 6708 Requires<[HasNEON, HasFullFP16]>; 6709 6710def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6711 v8i16, v8f16, fp_to_sint>, 6712 Requires<[HasNEON, HasFullFP16]>; 6713def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6714 v8i16, v8f16, fp_to_uint>, 6715 Requires<[HasNEON, HasFullFP16]>; 6716def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6717 v8f16, v8i16, sint_to_fp>, 6718 Requires<[HasNEON, HasFullFP16]>; 6719def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6720 v8f16, v8i16, uint_to_fp>, 6721 Requires<[HasNEON, HasFullFP16]>; 6722 6723// VCVT{A, N, P, M} 6724multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 6725 SDPatternOperator IntU> { 6726 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6727 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6728 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 6729 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6730 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 6731 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6732 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 6733 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6734 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 6735 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6736 "s16.f16", v4i16, v4f16, IntS>, 6737 Requires<[HasV8, HasNEON, HasFullFP16]>; 6738 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6739 "s16.f16", v8i16, v8f16, IntS>, 6740 Requires<[HasV8, HasNEON, HasFullFP16]>; 6741 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6742 "u16.f16", v4i16, v4f16, IntU>, 6743 Requires<[HasV8, HasNEON, HasFullFP16]>; 6744 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6745 "u16.f16", v8i16, v8f16, IntU>, 6746 Requires<[HasV8, HasNEON, HasFullFP16]>; 6747 } 6748} 6749 6750defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 6751defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 6752defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 6753defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 6754 6755// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 6756let DecoderMethod = "DecodeVCVTD" in { 6757def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6758 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 6759def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6760 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 6761def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6762 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 6763def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6764 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 6765let Predicates = [HasNEON, HasFullFP16] in { 6766def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6767 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; 6768def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6769 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; 6770def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6771 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; 6772def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6773 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; 6774} // Predicates = [HasNEON, HasFullFP16] 6775} 6776 6777let DecoderMethod = "DecodeVCVTQ" in { 6778def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6779 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 6780def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6781 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 6782def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6783 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 6784def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6785 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 6786let Predicates = [HasNEON, HasFullFP16] in { 6787def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6788 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; 6789def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6790 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; 6791def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6792 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; 6793def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6794 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; 6795} // Predicates = [HasNEON, HasFullFP16] 6796} 6797 6798def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 6799 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6800def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 6801 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6802def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 6803 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6804def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 6805 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6806 6807def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 6808 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6809def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 6810 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6811def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 6812 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6813def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 6814 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6815 6816def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", 6817 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6818def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", 6819 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6820def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", 6821 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6822def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", 6823 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6824 6825def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", 6826 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6827def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", 6828 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6829def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", 6830 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6831def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", 6832 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6833 6834 6835// VCVT : Vector Convert Between Half-Precision and Single-Precision. 6836def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 6837 IIC_VUNAQ, "vcvt", "f16.f32", 6838 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 6839 Requires<[HasNEON, HasFP16]>; 6840def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 6841 IIC_VUNAQ, "vcvt", "f32.f16", 6842 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 6843 Requires<[HasNEON, HasFP16]>; 6844 6845// Vector Reverse. 6846 6847// VREV64 : Vector Reverse elements within 64-bit doublewords 6848 6849class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6850 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 6851 (ins DPR:$Vm), IIC_VMOVD, 6852 OpcodeStr, Dt, "$Vd, $Vm", "", 6853 [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>; 6854class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6855 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 6856 (ins QPR:$Vm), IIC_VMOVQ, 6857 OpcodeStr, Dt, "$Vd, $Vm", "", 6858 [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>; 6859 6860def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 6861def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 6862def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 6863let Predicates = [HasNEON] in { 6864def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 6865} 6866 6867def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 6868def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 6869def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 6870 6871let Predicates = [HasNEON] in { 6872 def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), 6873 (VREV64q32 QPR:$Vm)>; 6874 def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), 6875 (VREV64q16 QPR:$Vm)>; 6876 def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), 6877 (VREV64d16 DPR:$Vm)>; 6878} 6879 6880// VREV32 : Vector Reverse elements within 32-bit words 6881 6882class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6883 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 6884 (ins DPR:$Vm), IIC_VMOVD, 6885 OpcodeStr, Dt, "$Vd, $Vm", "", 6886 [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>; 6887class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6888 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 6889 (ins QPR:$Vm), IIC_VMOVQ, 6890 OpcodeStr, Dt, "$Vd, $Vm", "", 6891 [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>; 6892 6893def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 6894def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 6895 6896def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 6897def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 6898 6899let Predicates = [HasNEON] in { 6900 def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))), 6901 (VREV32q16 QPR:$Vm)>; 6902 def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))), 6903 (VREV32d16 DPR:$Vm)>; 6904} 6905 6906// VREV16 : Vector Reverse elements within 16-bit halfwords 6907 6908class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6909 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 6910 (ins DPR:$Vm), IIC_VMOVD, 6911 OpcodeStr, Dt, "$Vd, $Vm", "", 6912 [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>; 6913class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6914 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 6915 (ins QPR:$Vm), IIC_VMOVQ, 6916 OpcodeStr, Dt, "$Vd, $Vm", "", 6917 [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>; 6918 6919def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 6920def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 6921 6922// Other Vector Shuffles. 6923 6924// Aligned extractions: really just dropping registers 6925 6926class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 6927 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 6928 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>, 6929 Requires<[HasNEON]>; 6930 6931def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 6932 6933def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 6934 6935def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 6936 6937def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 6938 6939def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 6940 6941def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16 6942 6943// VEXT : Vector Extract 6944 6945 6946// All of these have a two-operand InstAlias. 6947let TwoOperandAliasConstraint = "$Vn = $Vd" in { 6948class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6949 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 6950 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 6951 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6952 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 6953 (Ty DPR:$Vm), imm:$index)))]> { 6954 bits<3> index; 6955 let Inst{11} = 0b0; 6956 let Inst{10-8} = index{2-0}; 6957} 6958 6959class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6960 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 6961 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 6962 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6963 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 6964 (Ty QPR:$Vm), imm:$index)))]> { 6965 bits<4> index; 6966 let Inst{11-8} = index{3-0}; 6967} 6968} 6969 6970def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 6971 let Inst{10-8} = index{2-0}; 6972} 6973def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 6974 let Inst{10-9} = index{1-0}; 6975 let Inst{8} = 0b0; 6976} 6977let Predicates = [HasNEON] in { 6978def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))), 6979 (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>; 6980} 6981 6982def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 6983 let Inst{10} = index{0}; 6984 let Inst{9-8} = 0b00; 6985} 6986let Predicates = [HasNEON] in { 6987def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))), 6988 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 6989} 6990 6991def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 6992 let Inst{11-8} = index{3-0}; 6993} 6994def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 6995 let Inst{11-9} = index{2-0}; 6996 let Inst{8} = 0b0; 6997} 6998let Predicates = [HasNEON] in { 6999def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))), 7000 (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>; 7001} 7002 7003def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 7004 let Inst{11-10} = index{1-0}; 7005 let Inst{9-8} = 0b00; 7006} 7007def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 7008 let Inst{11} = index{0}; 7009 let Inst{10-8} = 0b000; 7010} 7011let Predicates = [HasNEON] in { 7012def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), 7013 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 7014} 7015 7016// VTRN : Vector Transpose 7017 7018def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 7019def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 7020def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 7021 7022def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 7023def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 7024def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 7025 7026// VUZP : Vector Unzip (Deinterleave) 7027 7028def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 7029def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 7030// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7031def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 7032 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7033 7034def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 7035def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 7036def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 7037 7038// VZIP : Vector Zip (Interleave) 7039 7040def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 7041def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 7042// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7043def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 7044 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7045 7046def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 7047def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 7048def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 7049 7050// Vector Table Lookup and Table Extension. 7051 7052// VTBL : Vector Table Lookup 7053let DecoderMethod = "DecodeTBLInstruction" in { 7054def VTBL1 7055 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 7056 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 7057 "vtbl", "8", "$Vd, $Vn, $Vm", "", 7058 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 7059 7060let hasExtraSrcRegAllocReq = 1 in { 7061def VTBL2 7062 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 7063 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 7064 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7065def VTBL3 7066 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 7067 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 7068 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7069def VTBL4 7070 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 7071 (ins VecListFourD:$Vn, DPR:$Vm), 7072 NVTBLFrm, IIC_VTB4, 7073 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7074} // hasExtraSrcRegAllocReq = 1 7075 7076def VTBL3Pseudo 7077 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 7078def VTBL4Pseudo 7079 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 7080 7081// VTBX : Vector Table Extension 7082def VTBX1 7083 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 7084 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 7085 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 7086 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 7087 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 7088let hasExtraSrcRegAllocReq = 1 in { 7089def VTBX2 7090 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 7091 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 7092 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 7093def VTBX3 7094 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 7095 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 7096 NVTBLFrm, IIC_VTBX3, 7097 "vtbx", "8", "$Vd, $Vn, $Vm", 7098 "$orig = $Vd", []>; 7099def VTBX4 7100 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 7101 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 7102 "vtbx", "8", "$Vd, $Vn, $Vm", 7103 "$orig = $Vd", []>; 7104} // hasExtraSrcRegAllocReq = 1 7105 7106def VTBX3Pseudo 7107 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7108 IIC_VTBX3, "$orig = $dst", []>; 7109def VTBX4Pseudo 7110 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7111 IIC_VTBX4, "$orig = $dst", []>; 7112} // DecoderMethod = "DecodeTBLInstruction" 7113 7114let Predicates = [HasNEON] in { 7115def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), 7116 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7117 v8i8:$Vn1, dsub_1), 7118 v8i8:$Vm))>; 7119def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7120 v8i8:$Vm)), 7121 (v8i8 (VTBX2 v8i8:$orig, 7122 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7123 v8i8:$Vn1, dsub_1), 7124 v8i8:$Vm))>; 7125 7126def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, 7127 v8i8:$Vn2, v8i8:$Vm)), 7128 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7129 v8i8:$Vn1, dsub_1, 7130 v8i8:$Vn2, dsub_2, 7131 (v8i8 (IMPLICIT_DEF)), dsub_3), 7132 v8i8:$Vm))>; 7133def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7134 v8i8:$Vn2, v8i8:$Vm)), 7135 (v8i8 (VTBX3Pseudo v8i8:$orig, 7136 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7137 v8i8:$Vn1, dsub_1, 7138 v8i8:$Vn2, dsub_2, 7139 (v8i8 (IMPLICIT_DEF)), dsub_3), 7140 v8i8:$Vm))>; 7141 7142def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, 7143 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7144 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7145 v8i8:$Vn1, dsub_1, 7146 v8i8:$Vn2, dsub_2, 7147 v8i8:$Vn3, dsub_3), 7148 v8i8:$Vm))>; 7149def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7150 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7151 (v8i8 (VTBX4Pseudo v8i8:$orig, 7152 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7153 v8i8:$Vn1, dsub_1, 7154 v8i8:$Vn2, dsub_2, 7155 v8i8:$Vn3, dsub_3), 7156 v8i8:$Vm))>; 7157} 7158 7159// VRINT : Vector Rounding 7160multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 7161 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 7162 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7163 !strconcat("vrint", op), "f32", 7164 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 7165 let Inst{9-7} = op9_7; 7166 } 7167 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7168 !strconcat("vrint", op), "f32", 7169 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 7170 let Inst{9-7} = op9_7; 7171 } 7172 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7173 !strconcat("vrint", op), "f16", 7174 v4f16, v4f16, Int>, 7175 Requires<[HasV8, HasNEON, HasFullFP16]> { 7176 let Inst{9-7} = op9_7; 7177 } 7178 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7179 !strconcat("vrint", op), "f16", 7180 v8f16, v8f16, Int>, 7181 Requires<[HasV8, HasNEON, HasFullFP16]> { 7182 let Inst{9-7} = op9_7; 7183 } 7184 } 7185 7186 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 7187 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>; 7188 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 7189 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; 7190 let Predicates = [HasNEON, HasFullFP16] in { 7191 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"), 7192 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; 7193 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"), 7194 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; 7195 } 7196} 7197 7198defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 7199defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 7200defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 7201defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 7202defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 7203defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 7204 7205// Cryptography instructions 7206let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 7207 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 7208 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 7209 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7210 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 7211 Requires<[HasV8, HasCrypto]>; 7212 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 7213 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7214 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 7215 Requires<[HasV8, HasCrypto]>; 7216 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7217 SDPatternOperator Int> 7218 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7219 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 7220 Requires<[HasV8, HasCrypto]>; 7221 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7222 SDPatternOperator Int> 7223 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7224 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 7225 Requires<[HasV8, HasCrypto]>; 7226 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 7227 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 7228 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, 7229 Requires<[HasV8, HasCrypto]>; 7230} 7231 7232def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 7233def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 7234def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 7235def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 7236 7237def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 7238def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 7239def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 7240def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 7241def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 7242def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 7243def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 7244def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 7245def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 7246def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 7247 7248let Predicates = [HasNEON] in { 7249def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 7250 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 7251 (SHA1H (SUBREG_TO_REG (i64 0), 7252 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 7253 ssub_0)), 7254 ssub_0)), GPR)>; 7255 7256def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7257 (SHA1C v4i32:$hash_abcd, 7258 (SUBREG_TO_REG (i64 0), 7259 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7260 ssub_0), 7261 v4i32:$wk)>; 7262 7263def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7264 (SHA1M v4i32:$hash_abcd, 7265 (SUBREG_TO_REG (i64 0), 7266 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7267 ssub_0), 7268 v4i32:$wk)>; 7269 7270def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7271 (SHA1P v4i32:$hash_abcd, 7272 (SUBREG_TO_REG (i64 0), 7273 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7274 ssub_0), 7275 v4i32:$wk)>; 7276} 7277 7278//===----------------------------------------------------------------------===// 7279// NEON instructions for single-precision FP math 7280//===----------------------------------------------------------------------===// 7281 7282class N2VSPat<SDNode OpNode, NeonI Inst> 7283 : NEONFPPat<(f32 (OpNode SPR:$a)), 7284 (EXTRACT_SUBREG 7285 (v2f32 (COPY_TO_REGCLASS (Inst 7286 (INSERT_SUBREG 7287 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7288 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 7289 7290class N3VSPat<SDNode OpNode, NeonI Inst> 7291 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 7292 (EXTRACT_SUBREG 7293 (v2f32 (COPY_TO_REGCLASS (Inst 7294 (INSERT_SUBREG 7295 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7296 SPR:$a, ssub_0), 7297 (INSERT_SUBREG 7298 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7299 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7300 7301class N3VSPatFP16<SDNode OpNode, NeonI Inst> 7302 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)), 7303 (EXTRACT_SUBREG 7304 (v4f16 (COPY_TO_REGCLASS (Inst 7305 (INSERT_SUBREG 7306 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7307 HPR:$a, ssub_0), 7308 (INSERT_SUBREG 7309 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7310 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7311 7312class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 7313 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 7314 (EXTRACT_SUBREG 7315 (v2f32 (COPY_TO_REGCLASS (Inst 7316 (INSERT_SUBREG 7317 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7318 SPR:$acc, ssub_0), 7319 (INSERT_SUBREG 7320 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7321 SPR:$a, ssub_0), 7322 (INSERT_SUBREG 7323 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7324 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7325 7326class NVCVTIFPat<SDNode OpNode, NeonI Inst> 7327 : NEONFPPat<(f32 (OpNode GPR:$a)), 7328 (f32 (EXTRACT_SUBREG 7329 (v2f32 (Inst 7330 (INSERT_SUBREG 7331 (v2f32 (IMPLICIT_DEF)), 7332 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 7333 ssub_0))>; 7334class NVCVTFIPat<SDNode OpNode, NeonI Inst> 7335 : NEONFPPat<(i32 (OpNode SPR:$a)), 7336 (i32 (EXTRACT_SUBREG 7337 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 7338 SPR:$a, ssub_0))), 7339 ssub_0))>; 7340 7341def : N3VSPat<fadd, VADDfd>; 7342def : N3VSPat<fsub, VSUBfd>; 7343def : N3VSPat<fmul, VMULfd>; 7344def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 7345 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7346def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 7347 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7348def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 7349 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7350def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 7351 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7352def : N2VSPat<fabs, VABSfd>; 7353def : N2VSPat<fneg, VNEGfd>; 7354def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>; 7355def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>; 7356def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>; 7357def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>; 7358def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 7359def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 7360def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 7361def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 7362 7363// NEON doesn't have any f64 conversions, so provide patterns to make 7364// sure the VFP conversions match when extracting from a vector. 7365def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7366 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7367def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7368 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7369def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7370 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7371def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7372 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7373 7374 7375// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 7376def : Pat<(f32 (bitconvert GPR:$a)), 7377 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7378 Requires<[HasNEON, DontUseVMOVSR]>; 7379def : Pat<(arm_vmovsr GPR:$a), 7380 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7381 Requires<[HasNEON, DontUseVMOVSR]>; 7382 7383//===----------------------------------------------------------------------===// 7384// Non-Instruction Patterns or Endianess - Revert Patterns 7385//===----------------------------------------------------------------------===// 7386 7387// bit_convert 7388// 64 bit conversions 7389let Predicates = [HasNEON] in { 7390def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 7391def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 7392 7393def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 7394def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 7395 7396def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>; 7397def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>; 7398 7399def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16 DPR:$src)>; 7400def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16 DPR:$src)>; 7401 7402// 128 bit conversions 7403def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 7404def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 7405 7406def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 7407def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 7408 7409def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; 7410def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>; 7411 7412def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16 QPR:$src)>; 7413def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16 QPR:$src)>; 7414} 7415 7416let Predicates = [IsLE,HasNEON] in { 7417 // 64 bit conversions 7418 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 7419 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 7420 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>; 7421 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (f64 DPR:$src)>; 7422 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 7423 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 7424 7425 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 7426 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 7427 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>; 7428 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>; 7429 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 7430 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 7431 7432 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 7433 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 7434 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>; 7435 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>; 7436 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 7437 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 7438 7439 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 7440 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 7441 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>; 7442 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>; 7443 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 7444 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 7445 7446 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>; 7447 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>; 7448 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>; 7449 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>; 7450 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>; 7451 7452 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (v4bf16 DPR:$src)>; 7453 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>; 7454 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>; 7455 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>; 7456 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (v4bf16 DPR:$src)>; 7457 7458 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 7459 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 7460 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 7461 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 7462 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 7463 7464 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 7465 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 7466 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 7467 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 7468 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>; 7469 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (v8i8 DPR:$src)>; 7470 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 7471 7472 // 128 bit conversions 7473 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 7474 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 7475 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; 7476 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>; 7477 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 7478 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 7479 7480 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 7481 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 7482 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>; 7483 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>; 7484 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 7485 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 7486 7487 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 7488 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 7489 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>; 7490 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>; 7491 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 7492 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 7493 7494 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 7495 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 7496 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>; 7497 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>; 7498 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 7499 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 7500 7501 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; 7502 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>; 7503 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>; 7504 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>; 7505 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>; 7506 7507 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>; 7508 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>; 7509 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>; 7510 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>; 7511 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>; 7512 7513 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 7514 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 7515 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 7516 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 7517 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 7518 7519 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 7520 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 7521 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 7522 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 7523 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; 7524 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>; 7525 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 7526} 7527 7528let Predicates = [IsBE,HasNEON] in { 7529 // 64 bit conversions 7530 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7531 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7532 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7533 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7534 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7535 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7536 7537 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7538 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7539 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7540 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7541 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7542 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7543 7544 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7545 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7546 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7547 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7548 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7549 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7550 7551 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7552 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7553 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7554 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7555 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7556 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7557 7558 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7559 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7560 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7561 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7562 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7563 7564 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7565 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7566 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7567 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7568 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7569 7570 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7571 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7572 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7573 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7574 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7575 7576 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 7577 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 7578 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 7579 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 7580 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>; 7581 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (VREV16d8 DPR:$src)>; 7582 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 7583 7584 // 128 bit conversions 7585 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7586 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7587 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7588 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7589 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7590 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7591 7592 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7593 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7594 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7595 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7596 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7597 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7598 7599 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7600 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7601 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7602 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7603 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7604 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7605 7606 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7607 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7608 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7609 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7610 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7611 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7612 7613 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7614 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7615 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7616 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7617 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7618 7619 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7620 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7621 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7622 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7623 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7624 7625 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7626 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7627 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7628 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7629 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7630 7631 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 7632 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 7633 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 7634 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 7635 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>; 7636 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8 QPR:$src)>; 7637 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 7638} 7639 7640let Predicates = [HasNEON] in { 7641 // Here we match the specific SDNode type 'ARMVectorRegCastImpl' 7642 // rather than the more general 'ARMVectorRegCast' which would also 7643 // match some bitconverts. If we use the latter in cases where the 7644 // input and output types are the same, the bitconvert gets elided 7645 // and we end up generating a nonsense match of nothing. 7646 7647 foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7648 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7649 def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>; 7650 7651 foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7652 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7653 def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>; 7654} 7655 7656// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian 7657let Predicates = [IsBE,HasNEON] in { 7658def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 7659 (VREV64q8 (VLD1q8 addrmode6:$addr))>; 7660def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7661 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>; 7662def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 7663 (VREV64q16 (VLD1q16 addrmode6:$addr))>; 7664def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7665 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>; 7666} 7667 7668// Fold extracting an element out of a v2i32 into a vfp register. 7669def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 7670 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>, 7671 Requires<[HasNEON]>; 7672 7673// Vector lengthening move with load, matching extending loads. 7674 7675// extload, zextload and sextload for a standard lengthening load. Example: 7676// Lengthen_Single<"8", "i16", "8"> = 7677// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 7678// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 7679// (f64 (IMPLICIT_DEF)), (i32 0)))>; 7680multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 7681 let AddedComplexity = 10 in { 7682 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7683 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 7684 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7685 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7686 Requires<[HasNEON]>; 7687 7688 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7689 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 7690 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7691 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7692 Requires<[HasNEON]>; 7693 7694 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7695 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 7696 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 7697 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7698 Requires<[HasNEON]>; 7699 } 7700} 7701 7702// extload, zextload and sextload for a lengthening load which only uses 7703// half the lanes available. Example: 7704// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 7705// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 7706// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7707// (f64 (IMPLICIT_DEF)), (i32 0))), 7708// dsub_0)>; 7709multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 7710 string InsnLanes, string InsnTy> { 7711 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7712 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7713 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7714 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7715 dsub_0)>, 7716 Requires<[HasNEON]>; 7717 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7718 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7719 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7720 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7721 dsub_0)>, 7722 Requires<[HasNEON]>; 7723 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7724 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7725 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7726 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7727 dsub_0)>, 7728 Requires<[HasNEON]>; 7729} 7730 7731// The following class definition is basically a copy of the 7732// Lengthen_HalfSingle definition above, however with an additional parameter 7733// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7734// data loaded by VLD1LN into proper vector format in big endian mode. 7735multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7736 string InsnLanes, string InsnTy, string RevLanes> { 7737 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7738 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7739 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7740 (!cast<Instruction>("VREV32d" # RevLanes) 7741 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7742 dsub_0)>, 7743 Requires<[HasNEON]>; 7744 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7745 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7746 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7747 (!cast<Instruction>("VREV32d" # RevLanes) 7748 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7749 dsub_0)>, 7750 Requires<[HasNEON]>; 7751 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7752 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7753 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7754 (!cast<Instruction>("VREV32d" # RevLanes) 7755 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7756 dsub_0)>, 7757 Requires<[HasNEON]>; 7758} 7759 7760// extload, zextload and sextload for a lengthening load followed by another 7761// lengthening load, to quadruple the initial length. 7762// 7763// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 7764// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 7765// (EXTRACT_SUBREG (VMOVLuv4i32 7766// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7767// (f64 (IMPLICIT_DEF)), 7768// (i32 0))), 7769// dsub_0)), 7770// dsub_0)>; 7771multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 7772 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7773 string Insn2Ty> { 7774 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7775 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7776 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7777 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7778 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7779 dsub_0))>, 7780 Requires<[HasNEON]>; 7781 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7782 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7783 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7784 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7785 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7786 dsub_0))>, 7787 Requires<[HasNEON]>; 7788 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7789 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7790 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7791 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7792 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7793 dsub_0))>, 7794 Requires<[HasNEON]>; 7795} 7796 7797// The following class definition is basically a copy of the 7798// Lengthen_Double definition above, however with an additional parameter 7799// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7800// data loaded by VLD1LN into proper vector format in big endian mode. 7801multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7802 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7803 string Insn2Ty, string RevLanes> { 7804 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7805 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7806 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7807 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7808 (!cast<Instruction>("VREV32d" # RevLanes) 7809 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7810 dsub_0))>, 7811 Requires<[HasNEON]>; 7812 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7813 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7814 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7815 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7816 (!cast<Instruction>("VREV32d" # RevLanes) 7817 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7818 dsub_0))>, 7819 Requires<[HasNEON]>; 7820 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7821 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7822 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7823 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7824 (!cast<Instruction>("VREV32d" # RevLanes) 7825 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7826 dsub_0))>, 7827 Requires<[HasNEON]>; 7828} 7829 7830// extload, zextload and sextload for a lengthening load followed by another 7831// lengthening load, to quadruple the initial length, but which ends up only 7832// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 7833// 7834// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 7835// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 7836// (EXTRACT_SUBREG (VMOVLuv4i32 7837// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 7838// (f64 (IMPLICIT_DEF)), (i32 0))), 7839// dsub_0)), 7840// dsub_0)>; 7841multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 7842 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7843 string Insn2Ty> { 7844 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7845 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7846 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7847 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7848 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7849 dsub_0)), 7850 dsub_0)>, 7851 Requires<[HasNEON]>; 7852 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7853 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7854 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7855 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7856 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7857 dsub_0)), 7858 dsub_0)>, 7859 Requires<[HasNEON]>; 7860 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7861 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7862 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7863 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7864 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7865 dsub_0)), 7866 dsub_0)>, 7867 Requires<[HasNEON]>; 7868} 7869 7870// The following class definition is basically a copy of the 7871// Lengthen_HalfDouble definition above, however with an additional VREV16d8 7872// instruction to convert data loaded by VLD1LN into proper vector format 7873// in big endian mode. 7874multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7875 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7876 string Insn2Ty> { 7877 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7878 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7879 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7880 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7881 (!cast<Instruction>("VREV16d8") 7882 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7883 dsub_0)), 7884 dsub_0)>, 7885 Requires<[HasNEON]>; 7886 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7887 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7888 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7889 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7890 (!cast<Instruction>("VREV16d8") 7891 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7892 dsub_0)), 7893 dsub_0)>, 7894 Requires<[HasNEON]>; 7895 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7896 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7897 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7898 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7899 (!cast<Instruction>("VREV16d8") 7900 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7901 dsub_0)), 7902 dsub_0)>, 7903 Requires<[HasNEON]>; 7904} 7905 7906defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 7907defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 7908defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 7909 7910let Predicates = [HasNEON,IsLE] in { 7911 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 7912 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 7913 7914 // Double lengthening - v4i8 -> v4i16 -> v4i32 7915 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 7916 // v2i8 -> v2i16 -> v2i32 7917 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 7918 // v2i16 -> v2i32 -> v2i64 7919 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 7920} 7921 7922let Predicates = [HasNEON,IsBE] in { 7923 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 7924 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 7925 7926 // Double lengthening - v4i8 -> v4i16 -> v4i32 7927 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 7928 // v2i8 -> v2i16 -> v2i32 7929 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 7930 // v2i16 -> v2i32 -> v2i64 7931 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 7932} 7933 7934// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 7935let Predicates = [HasNEON,IsLE] in { 7936 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7937 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7938 (VLD1LNd16 addrmode6:$addr, 7939 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7940 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7941 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7942 (VLD1LNd16 addrmode6:$addr, 7943 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7944 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7945 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7946 (VLD1LNd16 addrmode6:$addr, 7947 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7948} 7949// The following patterns are basically a copy of the patterns above, 7950// however with an additional VREV16d instruction to convert data 7951// loaded by VLD1LN into proper vector format in big endian mode. 7952let Predicates = [HasNEON,IsBE] in { 7953 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7954 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7955 (!cast<Instruction>("VREV16d8") 7956 (VLD1LNd16 addrmode6:$addr, 7957 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7958 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7959 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7960 (!cast<Instruction>("VREV16d8") 7961 (VLD1LNd16 addrmode6:$addr, 7962 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7963 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7964 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7965 (!cast<Instruction>("VREV16d8") 7966 (VLD1LNd16 addrmode6:$addr, 7967 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7968} 7969 7970let Predicates = [HasNEON] in { 7971def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), 7972 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7973def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), 7974 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7975def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), 7976 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7977def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), 7978 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7979def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), 7980 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7981def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)), 7982 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7983def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)), 7984 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7985} 7986 7987//===----------------------------------------------------------------------===// 7988// Assembler aliases 7989// 7990 7991def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 7992 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 7993def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 7994 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 7995 7996// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 7997defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 7998 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7999defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 8000 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8001defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8002 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8003defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8004 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8005defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8006 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8007defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8008 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8009defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8010 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8011defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8012 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8013// ... two-operand aliases 8014defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8015 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8016defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8017 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8018defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8019 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8020defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8021 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8022defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8023 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8024defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8025 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8026// ... immediates 8027def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8028 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8029def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8030 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8031def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8032 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8033def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8034 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8035 8036 8037// VLD1 single-lane pseudo-instructions. These need special handling for 8038// the lane index that an InstAlias can't handle, so we use these instead. 8039def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 8040 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8041 pred:$p)>; 8042def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 8043 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8044 pred:$p)>; 8045def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 8046 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8047 pred:$p)>; 8048 8049def VLD1LNdWB_fixed_Asm_8 : 8050 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 8051 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8052 pred:$p)>; 8053def VLD1LNdWB_fixed_Asm_16 : 8054 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 8055 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8056 pred:$p)>; 8057def VLD1LNdWB_fixed_Asm_32 : 8058 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 8059 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8060 pred:$p)>; 8061def VLD1LNdWB_register_Asm_8 : 8062 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 8063 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8064 rGPR:$Rm, pred:$p)>; 8065def VLD1LNdWB_register_Asm_16 : 8066 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 8067 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8068 rGPR:$Rm, pred:$p)>; 8069def VLD1LNdWB_register_Asm_32 : 8070 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 8071 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8072 rGPR:$Rm, pred:$p)>; 8073 8074 8075// VST1 single-lane pseudo-instructions. These need special handling for 8076// the lane index that an InstAlias can't handle, so we use these instead. 8077def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 8078 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8079 pred:$p)>; 8080def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 8081 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8082 pred:$p)>; 8083def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 8084 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8085 pred:$p)>; 8086 8087def VST1LNdWB_fixed_Asm_8 : 8088 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 8089 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8090 pred:$p)>; 8091def VST1LNdWB_fixed_Asm_16 : 8092 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 8093 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8094 pred:$p)>; 8095def VST1LNdWB_fixed_Asm_32 : 8096 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 8097 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8098 pred:$p)>; 8099def VST1LNdWB_register_Asm_8 : 8100 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 8101 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8102 rGPR:$Rm, pred:$p)>; 8103def VST1LNdWB_register_Asm_16 : 8104 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 8105 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8106 rGPR:$Rm, pred:$p)>; 8107def VST1LNdWB_register_Asm_32 : 8108 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 8109 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8110 rGPR:$Rm, pred:$p)>; 8111 8112// VLD2 single-lane pseudo-instructions. These need special handling for 8113// the lane index that an InstAlias can't handle, so we use these instead. 8114def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 8115 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8116 pred:$p)>; 8117def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8118 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8119 pred:$p)>; 8120def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8121 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 8122def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8123 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8124 pred:$p)>; 8125def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8126 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8127 pred:$p)>; 8128 8129def VLD2LNdWB_fixed_Asm_8 : 8130 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 8131 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8132 pred:$p)>; 8133def VLD2LNdWB_fixed_Asm_16 : 8134 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8135 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8136 pred:$p)>; 8137def VLD2LNdWB_fixed_Asm_32 : 8138 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8139 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8140 pred:$p)>; 8141def VLD2LNqWB_fixed_Asm_16 : 8142 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8143 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8144 pred:$p)>; 8145def VLD2LNqWB_fixed_Asm_32 : 8146 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8147 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8148 pred:$p)>; 8149def VLD2LNdWB_register_Asm_8 : 8150 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 8151 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8152 rGPR:$Rm, pred:$p)>; 8153def VLD2LNdWB_register_Asm_16 : 8154 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8155 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8156 rGPR:$Rm, pred:$p)>; 8157def VLD2LNdWB_register_Asm_32 : 8158 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8159 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8160 rGPR:$Rm, pred:$p)>; 8161def VLD2LNqWB_register_Asm_16 : 8162 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8163 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8164 rGPR:$Rm, pred:$p)>; 8165def VLD2LNqWB_register_Asm_32 : 8166 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8167 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8168 rGPR:$Rm, pred:$p)>; 8169 8170 8171// VST2 single-lane pseudo-instructions. These need special handling for 8172// the lane index that an InstAlias can't handle, so we use these instead. 8173def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 8174 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8175 pred:$p)>; 8176def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8177 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8178 pred:$p)>; 8179def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8180 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8181 pred:$p)>; 8182def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8183 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8184 pred:$p)>; 8185def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8186 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8187 pred:$p)>; 8188 8189def VST2LNdWB_fixed_Asm_8 : 8190 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 8191 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8192 pred:$p)>; 8193def VST2LNdWB_fixed_Asm_16 : 8194 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8195 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8196 pred:$p)>; 8197def VST2LNdWB_fixed_Asm_32 : 8198 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8199 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8200 pred:$p)>; 8201def VST2LNqWB_fixed_Asm_16 : 8202 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8203 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8204 pred:$p)>; 8205def VST2LNqWB_fixed_Asm_32 : 8206 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8207 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8208 pred:$p)>; 8209def VST2LNdWB_register_Asm_8 : 8210 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 8211 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8212 rGPR:$Rm, pred:$p)>; 8213def VST2LNdWB_register_Asm_16 : 8214 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8215 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8216 rGPR:$Rm, pred:$p)>; 8217def VST2LNdWB_register_Asm_32 : 8218 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8219 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8220 rGPR:$Rm, pred:$p)>; 8221def VST2LNqWB_register_Asm_16 : 8222 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8223 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8224 rGPR:$Rm, pred:$p)>; 8225def VST2LNqWB_register_Asm_32 : 8226 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8227 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8228 rGPR:$Rm, pred:$p)>; 8229 8230// VLD3 all-lanes pseudo-instructions. These need special handling for 8231// the lane index that an InstAlias can't handle, so we use these instead. 8232def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8233 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8234 pred:$p)>; 8235def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8236 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8237 pred:$p)>; 8238def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8239 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8240 pred:$p)>; 8241def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8242 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8243 pred:$p)>; 8244def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8245 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8246 pred:$p)>; 8247def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8248 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8249 pred:$p)>; 8250 8251def VLD3DUPdWB_fixed_Asm_8 : 8252 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8253 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8254 pred:$p)>; 8255def VLD3DUPdWB_fixed_Asm_16 : 8256 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8257 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8258 pred:$p)>; 8259def VLD3DUPdWB_fixed_Asm_32 : 8260 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8261 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8262 pred:$p)>; 8263def VLD3DUPqWB_fixed_Asm_8 : 8264 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8265 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8266 pred:$p)>; 8267def VLD3DUPqWB_fixed_Asm_16 : 8268 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8269 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8270 pred:$p)>; 8271def VLD3DUPqWB_fixed_Asm_32 : 8272 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8273 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8274 pred:$p)>; 8275def VLD3DUPdWB_register_Asm_8 : 8276 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8277 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8278 rGPR:$Rm, pred:$p)>; 8279def VLD3DUPdWB_register_Asm_16 : 8280 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8281 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8282 rGPR:$Rm, pred:$p)>; 8283def VLD3DUPdWB_register_Asm_32 : 8284 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8285 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8286 rGPR:$Rm, pred:$p)>; 8287def VLD3DUPqWB_register_Asm_8 : 8288 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8289 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8290 rGPR:$Rm, pred:$p)>; 8291def VLD3DUPqWB_register_Asm_16 : 8292 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8293 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8294 rGPR:$Rm, pred:$p)>; 8295def VLD3DUPqWB_register_Asm_32 : 8296 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8297 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8298 rGPR:$Rm, pred:$p)>; 8299 8300 8301// VLD3 single-lane pseudo-instructions. These need special handling for 8302// the lane index that an InstAlias can't handle, so we use these instead. 8303def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8304 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8305 pred:$p)>; 8306def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8307 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8308 pred:$p)>; 8309def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8310 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8311 pred:$p)>; 8312def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8313 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8314 pred:$p)>; 8315def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8316 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8317 pred:$p)>; 8318 8319def VLD3LNdWB_fixed_Asm_8 : 8320 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8321 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8322 pred:$p)>; 8323def VLD3LNdWB_fixed_Asm_16 : 8324 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8325 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8326 pred:$p)>; 8327def VLD3LNdWB_fixed_Asm_32 : 8328 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8329 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8330 pred:$p)>; 8331def VLD3LNqWB_fixed_Asm_16 : 8332 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8333 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8334 pred:$p)>; 8335def VLD3LNqWB_fixed_Asm_32 : 8336 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8337 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8338 pred:$p)>; 8339def VLD3LNdWB_register_Asm_8 : 8340 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8341 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8342 rGPR:$Rm, pred:$p)>; 8343def VLD3LNdWB_register_Asm_16 : 8344 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8345 (ins VecListThreeDHWordIndexed:$list, 8346 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8347def VLD3LNdWB_register_Asm_32 : 8348 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8349 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8350 rGPR:$Rm, pred:$p)>; 8351def VLD3LNqWB_register_Asm_16 : 8352 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8353 (ins VecListThreeQHWordIndexed:$list, 8354 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8355def VLD3LNqWB_register_Asm_32 : 8356 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8357 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8358 rGPR:$Rm, pred:$p)>; 8359 8360// VLD3 multiple structure pseudo-instructions. These need special handling for 8361// the vector operands that the normal instructions don't yet model. 8362// FIXME: Remove these when the register classes and instructions are updated. 8363def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8364 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8365def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8366 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8367def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8368 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8369def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8370 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8371def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8372 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8373def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8374 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8375 8376def VLD3dWB_fixed_Asm_8 : 8377 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8378 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8379def VLD3dWB_fixed_Asm_16 : 8380 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8381 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8382def VLD3dWB_fixed_Asm_32 : 8383 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8384 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8385def VLD3qWB_fixed_Asm_8 : 8386 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8387 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8388def VLD3qWB_fixed_Asm_16 : 8389 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8390 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8391def VLD3qWB_fixed_Asm_32 : 8392 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8393 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8394def VLD3dWB_register_Asm_8 : 8395 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8396 (ins VecListThreeD:$list, addrmode6align64:$addr, 8397 rGPR:$Rm, pred:$p)>; 8398def VLD3dWB_register_Asm_16 : 8399 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8400 (ins VecListThreeD:$list, addrmode6align64:$addr, 8401 rGPR:$Rm, pred:$p)>; 8402def VLD3dWB_register_Asm_32 : 8403 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8404 (ins VecListThreeD:$list, addrmode6align64:$addr, 8405 rGPR:$Rm, pred:$p)>; 8406def VLD3qWB_register_Asm_8 : 8407 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8408 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8409 rGPR:$Rm, pred:$p)>; 8410def VLD3qWB_register_Asm_16 : 8411 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8412 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8413 rGPR:$Rm, pred:$p)>; 8414def VLD3qWB_register_Asm_32 : 8415 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8416 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8417 rGPR:$Rm, pred:$p)>; 8418 8419// VST3 single-lane pseudo-instructions. These need special handling for 8420// the lane index that an InstAlias can't handle, so we use these instead. 8421def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8422 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8423 pred:$p)>; 8424def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8425 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8426 pred:$p)>; 8427def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8428 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8429 pred:$p)>; 8430def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8431 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8432 pred:$p)>; 8433def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8434 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8435 pred:$p)>; 8436 8437def VST3LNdWB_fixed_Asm_8 : 8438 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8439 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8440 pred:$p)>; 8441def VST3LNdWB_fixed_Asm_16 : 8442 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8443 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8444 pred:$p)>; 8445def VST3LNdWB_fixed_Asm_32 : 8446 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8447 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8448 pred:$p)>; 8449def VST3LNqWB_fixed_Asm_16 : 8450 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8451 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8452 pred:$p)>; 8453def VST3LNqWB_fixed_Asm_32 : 8454 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8455 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8456 pred:$p)>; 8457def VST3LNdWB_register_Asm_8 : 8458 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8459 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8460 rGPR:$Rm, pred:$p)>; 8461def VST3LNdWB_register_Asm_16 : 8462 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8463 (ins VecListThreeDHWordIndexed:$list, 8464 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8465def VST3LNdWB_register_Asm_32 : 8466 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8467 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8468 rGPR:$Rm, pred:$p)>; 8469def VST3LNqWB_register_Asm_16 : 8470 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8471 (ins VecListThreeQHWordIndexed:$list, 8472 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8473def VST3LNqWB_register_Asm_32 : 8474 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8475 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8476 rGPR:$Rm, pred:$p)>; 8477 8478 8479// VST3 multiple structure pseudo-instructions. These need special handling for 8480// the vector operands that the normal instructions don't yet model. 8481// FIXME: Remove these when the register classes and instructions are updated. 8482def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8483 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8484def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8485 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8486def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8487 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8488def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8489 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8490def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8491 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8492def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8493 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8494 8495def VST3dWB_fixed_Asm_8 : 8496 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8497 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8498def VST3dWB_fixed_Asm_16 : 8499 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8500 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8501def VST3dWB_fixed_Asm_32 : 8502 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8503 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8504def VST3qWB_fixed_Asm_8 : 8505 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8506 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8507def VST3qWB_fixed_Asm_16 : 8508 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8509 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8510def VST3qWB_fixed_Asm_32 : 8511 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8512 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8513def VST3dWB_register_Asm_8 : 8514 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8515 (ins VecListThreeD:$list, addrmode6align64:$addr, 8516 rGPR:$Rm, pred:$p)>; 8517def VST3dWB_register_Asm_16 : 8518 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8519 (ins VecListThreeD:$list, addrmode6align64:$addr, 8520 rGPR:$Rm, pred:$p)>; 8521def VST3dWB_register_Asm_32 : 8522 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8523 (ins VecListThreeD:$list, addrmode6align64:$addr, 8524 rGPR:$Rm, pred:$p)>; 8525def VST3qWB_register_Asm_8 : 8526 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8527 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8528 rGPR:$Rm, pred:$p)>; 8529def VST3qWB_register_Asm_16 : 8530 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8531 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8532 rGPR:$Rm, pred:$p)>; 8533def VST3qWB_register_Asm_32 : 8534 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8535 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8536 rGPR:$Rm, pred:$p)>; 8537 8538// VLD4 all-lanes pseudo-instructions. These need special handling for 8539// the lane index that an InstAlias can't handle, so we use these instead. 8540def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8541 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8542 pred:$p)>; 8543def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8544 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8545 pred:$p)>; 8546def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8547 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8548 pred:$p)>; 8549def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8550 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8551 pred:$p)>; 8552def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8553 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8554 pred:$p)>; 8555def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8556 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8557 pred:$p)>; 8558 8559def VLD4DUPdWB_fixed_Asm_8 : 8560 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8561 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8562 pred:$p)>; 8563def VLD4DUPdWB_fixed_Asm_16 : 8564 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8565 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8566 pred:$p)>; 8567def VLD4DUPdWB_fixed_Asm_32 : 8568 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8569 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8570 pred:$p)>; 8571def VLD4DUPqWB_fixed_Asm_8 : 8572 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8573 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8574 pred:$p)>; 8575def VLD4DUPqWB_fixed_Asm_16 : 8576 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8577 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8578 pred:$p)>; 8579def VLD4DUPqWB_fixed_Asm_32 : 8580 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8581 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8582 pred:$p)>; 8583def VLD4DUPdWB_register_Asm_8 : 8584 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8585 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8586 rGPR:$Rm, pred:$p)>; 8587def VLD4DUPdWB_register_Asm_16 : 8588 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8589 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8590 rGPR:$Rm, pred:$p)>; 8591def VLD4DUPdWB_register_Asm_32 : 8592 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8593 (ins VecListFourDAllLanes:$list, 8594 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8595def VLD4DUPqWB_register_Asm_8 : 8596 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8597 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8598 rGPR:$Rm, pred:$p)>; 8599def VLD4DUPqWB_register_Asm_16 : 8600 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8601 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8602 rGPR:$Rm, pred:$p)>; 8603def VLD4DUPqWB_register_Asm_32 : 8604 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8605 (ins VecListFourQAllLanes:$list, 8606 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8607 8608 8609// VLD4 single-lane pseudo-instructions. These need special handling for 8610// the lane index that an InstAlias can't handle, so we use these instead. 8611def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8612 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8613 pred:$p)>; 8614def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8615 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8616 pred:$p)>; 8617def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8618 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8619 pred:$p)>; 8620def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8621 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8622 pred:$p)>; 8623def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8624 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8625 pred:$p)>; 8626 8627def VLD4LNdWB_fixed_Asm_8 : 8628 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8629 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8630 pred:$p)>; 8631def VLD4LNdWB_fixed_Asm_16 : 8632 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8633 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8634 pred:$p)>; 8635def VLD4LNdWB_fixed_Asm_32 : 8636 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8637 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8638 pred:$p)>; 8639def VLD4LNqWB_fixed_Asm_16 : 8640 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8641 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8642 pred:$p)>; 8643def VLD4LNqWB_fixed_Asm_32 : 8644 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8645 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8646 pred:$p)>; 8647def VLD4LNdWB_register_Asm_8 : 8648 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8649 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8650 rGPR:$Rm, pred:$p)>; 8651def VLD4LNdWB_register_Asm_16 : 8652 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8653 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8654 rGPR:$Rm, pred:$p)>; 8655def VLD4LNdWB_register_Asm_32 : 8656 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8657 (ins VecListFourDWordIndexed:$list, 8658 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8659def VLD4LNqWB_register_Asm_16 : 8660 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8661 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8662 rGPR:$Rm, pred:$p)>; 8663def VLD4LNqWB_register_Asm_32 : 8664 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8665 (ins VecListFourQWordIndexed:$list, 8666 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8667 8668 8669 8670// VLD4 multiple structure pseudo-instructions. These need special handling for 8671// the vector operands that the normal instructions don't yet model. 8672// FIXME: Remove these when the register classes and instructions are updated. 8673def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8674 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8675 pred:$p)>; 8676def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8677 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8678 pred:$p)>; 8679def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8680 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8681 pred:$p)>; 8682def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8683 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8684 pred:$p)>; 8685def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8686 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8687 pred:$p)>; 8688def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8689 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8690 pred:$p)>; 8691 8692def VLD4dWB_fixed_Asm_8 : 8693 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8694 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8695 pred:$p)>; 8696def VLD4dWB_fixed_Asm_16 : 8697 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8698 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8699 pred:$p)>; 8700def VLD4dWB_fixed_Asm_32 : 8701 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8702 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8703 pred:$p)>; 8704def VLD4qWB_fixed_Asm_8 : 8705 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8706 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8707 pred:$p)>; 8708def VLD4qWB_fixed_Asm_16 : 8709 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8710 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8711 pred:$p)>; 8712def VLD4qWB_fixed_Asm_32 : 8713 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8714 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8715 pred:$p)>; 8716def VLD4dWB_register_Asm_8 : 8717 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8718 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8719 rGPR:$Rm, pred:$p)>; 8720def VLD4dWB_register_Asm_16 : 8721 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8722 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8723 rGPR:$Rm, pred:$p)>; 8724def VLD4dWB_register_Asm_32 : 8725 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8726 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8727 rGPR:$Rm, pred:$p)>; 8728def VLD4qWB_register_Asm_8 : 8729 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8730 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8731 rGPR:$Rm, pred:$p)>; 8732def VLD4qWB_register_Asm_16 : 8733 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8734 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8735 rGPR:$Rm, pred:$p)>; 8736def VLD4qWB_register_Asm_32 : 8737 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8738 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8739 rGPR:$Rm, pred:$p)>; 8740 8741// VST4 single-lane pseudo-instructions. These need special handling for 8742// the lane index that an InstAlias can't handle, so we use these instead. 8743def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8744 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8745 pred:$p)>; 8746def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8747 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8748 pred:$p)>; 8749def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8750 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8751 pred:$p)>; 8752def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8753 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8754 pred:$p)>; 8755def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8756 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8757 pred:$p)>; 8758 8759def VST4LNdWB_fixed_Asm_8 : 8760 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8761 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8762 pred:$p)>; 8763def VST4LNdWB_fixed_Asm_16 : 8764 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8765 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8766 pred:$p)>; 8767def VST4LNdWB_fixed_Asm_32 : 8768 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8769 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8770 pred:$p)>; 8771def VST4LNqWB_fixed_Asm_16 : 8772 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8773 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8774 pred:$p)>; 8775def VST4LNqWB_fixed_Asm_32 : 8776 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8777 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8778 pred:$p)>; 8779def VST4LNdWB_register_Asm_8 : 8780 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8781 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8782 rGPR:$Rm, pred:$p)>; 8783def VST4LNdWB_register_Asm_16 : 8784 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8785 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8786 rGPR:$Rm, pred:$p)>; 8787def VST4LNdWB_register_Asm_32 : 8788 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8789 (ins VecListFourDWordIndexed:$list, 8790 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8791def VST4LNqWB_register_Asm_16 : 8792 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8793 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8794 rGPR:$Rm, pred:$p)>; 8795def VST4LNqWB_register_Asm_32 : 8796 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8797 (ins VecListFourQWordIndexed:$list, 8798 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8799 8800 8801// VST4 multiple structure pseudo-instructions. These need special handling for 8802// the vector operands that the normal instructions don't yet model. 8803// FIXME: Remove these when the register classes and instructions are updated. 8804def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8805 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8806 pred:$p)>; 8807def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8808 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8809 pred:$p)>; 8810def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8811 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8812 pred:$p)>; 8813def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8814 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8815 pred:$p)>; 8816def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8817 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8818 pred:$p)>; 8819def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8820 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8821 pred:$p)>; 8822 8823def VST4dWB_fixed_Asm_8 : 8824 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8825 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8826 pred:$p)>; 8827def VST4dWB_fixed_Asm_16 : 8828 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8829 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8830 pred:$p)>; 8831def VST4dWB_fixed_Asm_32 : 8832 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8833 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8834 pred:$p)>; 8835def VST4qWB_fixed_Asm_8 : 8836 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8837 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8838 pred:$p)>; 8839def VST4qWB_fixed_Asm_16 : 8840 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8841 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8842 pred:$p)>; 8843def VST4qWB_fixed_Asm_32 : 8844 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8845 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8846 pred:$p)>; 8847def VST4dWB_register_Asm_8 : 8848 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8849 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8850 rGPR:$Rm, pred:$p)>; 8851def VST4dWB_register_Asm_16 : 8852 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8853 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8854 rGPR:$Rm, pred:$p)>; 8855def VST4dWB_register_Asm_32 : 8856 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8857 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8858 rGPR:$Rm, pred:$p)>; 8859def VST4qWB_register_Asm_8 : 8860 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8861 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8862 rGPR:$Rm, pred:$p)>; 8863def VST4qWB_register_Asm_16 : 8864 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8865 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8866 rGPR:$Rm, pred:$p)>; 8867def VST4qWB_register_Asm_32 : 8868 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8869 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8870 rGPR:$Rm, pred:$p)>; 8871 8872// VMOV/VMVN takes an optional datatype suffix 8873defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8874 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 8875defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8876 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 8877 8878defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8879 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 8880defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8881 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 8882 8883// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8884// D-register versions. 8885def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 8886 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8887def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 8888 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8889def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 8890 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8891def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 8892 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8893def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 8894 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8895def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 8896 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8897def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 8898 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8899let Predicates = [HasNEON, HasFullFP16] in 8900def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", 8901 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8902// Q-register versions. 8903def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 8904 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8905def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 8906 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8907def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 8908 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8909def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 8910 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8911def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 8912 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8913def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 8914 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8915def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 8916 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8917let Predicates = [HasNEON, HasFullFP16] in 8918def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", 8919 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8920 8921// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8922// D-register versions. 8923def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 8924 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8925def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 8926 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8927def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 8928 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8929def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 8930 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8931def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 8932 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8933def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 8934 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8935def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 8936 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8937let Predicates = [HasNEON, HasFullFP16] in 8938def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", 8939 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8940// Q-register versions. 8941def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 8942 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8943def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 8944 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8945def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 8946 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8947def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 8948 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8949def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 8950 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8951def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 8952 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8953def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 8954 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8955let Predicates = [HasNEON, HasFullFP16] in 8956def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", 8957 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8958 8959// VSWP allows, but does not require, a type suffix. 8960defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8961 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 8962defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8963 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 8964 8965// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 8966defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8967 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8968defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8969 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8970defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8971 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8972defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8973 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8974defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8975 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8976defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8977 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8978 8979// "vmov Rd, #-imm" can be handled via "vmvn". 8980def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8981 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8982def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8983 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8984def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8985 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8986def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8987 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8988 8989// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 8990// these should restrict to just the Q register variants, but the register 8991// classes are enough to match correctly regardless, so we keep it simple 8992// and just use MnemonicAlias. 8993def : NEONMnemonicAlias<"vbicq", "vbic">; 8994def : NEONMnemonicAlias<"vandq", "vand">; 8995def : NEONMnemonicAlias<"veorq", "veor">; 8996def : NEONMnemonicAlias<"vorrq", "vorr">; 8997 8998def : NEONMnemonicAlias<"vmovq", "vmov">; 8999def : NEONMnemonicAlias<"vmvnq", "vmvn">; 9000// Explicit versions for floating point so that the FPImm variants get 9001// handled early. The parser gets confused otherwise. 9002def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 9003def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 9004 9005def : NEONMnemonicAlias<"vaddq", "vadd">; 9006def : NEONMnemonicAlias<"vsubq", "vsub">; 9007 9008def : NEONMnemonicAlias<"vminq", "vmin">; 9009def : NEONMnemonicAlias<"vmaxq", "vmax">; 9010 9011def : NEONMnemonicAlias<"vmulq", "vmul">; 9012 9013def : NEONMnemonicAlias<"vabsq", "vabs">; 9014 9015def : NEONMnemonicAlias<"vshlq", "vshl">; 9016def : NEONMnemonicAlias<"vshrq", "vshr">; 9017 9018def : NEONMnemonicAlias<"vcvtq", "vcvt">; 9019 9020def : NEONMnemonicAlias<"vcleq", "vcle">; 9021def : NEONMnemonicAlias<"vceqq", "vceq">; 9022 9023def : NEONMnemonicAlias<"vzipq", "vzip">; 9024def : NEONMnemonicAlias<"vswpq", "vswp">; 9025 9026def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 9027def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 9028 9029 9030// Alias for loading floating point immediates that aren't representable 9031// using the vmov.f32 encoding but the bitpattern is representable using 9032// the .i32 encoding. 9033def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9034 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9035def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9036 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9037 9038// ARMv8.6a BFloat16 instructions. 9039let Predicates = [HasBF16, HasNEON] in { 9040class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6, 9041 dag oops, dag iops, list<dag> pattern> 9042 : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops, 9043 N3RegFrm, IIC_VDOTPROD, "", "", pattern> 9044{ 9045 let DecoderNamespace = "VFPV8"; 9046} 9047 9048class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy> 9049 : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst), 9050 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9051 [(set (AccumTy RegTy:$dst), 9052 (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9053 (InputTy RegTy:$Vn), 9054 (InputTy RegTy:$Vm)))]> { 9055 let Constraints = "$dst = $Vd"; 9056 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9057 let DecoderNamespace = "VFPV8"; 9058} 9059 9060multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, 9061 ValueType InputTy, dag RHS> { 9062 9063 def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst), 9064 (ins RegTy:$Vd, RegTy:$Vn, 9065 DPR_VFP2:$Vm, VectorIndex32:$lane), []> { 9066 bit lane; 9067 let Inst{5} = lane; 9068 let Constraints = "$dst = $Vd"; 9069 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane"); 9070 let DecoderNamespace = "VFPV8"; 9071 } 9072 9073 def : Pat< 9074 (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9075 (InputTy RegTy:$Vn), 9076 (InputTy (bitconvert (AccumTy 9077 (ARMvduplane (AccumTy RegTy:$Vm), 9078 VectorIndex32:$lane)))))), 9079 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 9080} 9081 9082def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>; 9083def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>; 9084 9085defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>; 9086defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 9087 9088class BF16MM<bit Q, RegisterClass RegTy, 9089 string opc> 9090 : N3Vnp<0b11000, 0b00, 0b1100, Q, 0, 9091 (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9092 N3RegFrm, IIC_VDOTPROD, "", "", 9093 [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd), 9094 (v8bf16 QPR:$Vn), 9095 (v8bf16 QPR:$Vm)))]> { 9096 let Constraints = "$dst = $Vd"; 9097 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9098 let DecoderNamespace = "VFPV8"; 9099} 9100 9101def VMMLA : BF16MM<1, QPR, "vmmla">; 9102 9103class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode> 9104 : N3VCP8<0b00, 0b11, T, 1, 9105 (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), 9106 NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "", 9107 [(set (v4f32 QPR:$dst), 9108 (OpNode (v4f32 QPR:$Vd), 9109 (v8bf16 QPR:$Vn), 9110 (v8bf16 QPR:$Vm)))]> { 9111 let Constraints = "$dst = $Vd"; 9112 let DecoderNamespace = "VFPV8"; 9113} 9114 9115def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>; 9116def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>; 9117 9118multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> { 9119 def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst), 9120 (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 9121 IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> { 9122 bits<2> idx; 9123 let Inst{5} = idx{1}; 9124 let Inst{3} = idx{0}; 9125 let Constraints = "$dst = $Vd"; 9126 let DecoderNamespace = "VFPV8"; 9127 } 9128 9129 def : Pat< 9130 (v4f32 (OpNode (v4f32 QPR:$Vd), 9131 (v8bf16 QPR:$Vn), 9132 (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm), 9133 VectorIndex16:$lane)))), 9134 (!cast<Instruction>(NAME) QPR:$Vd, 9135 QPR:$Vn, 9136 (EXTRACT_SUBREG QPR:$Vm, 9137 (DSubReg_i16_reg VectorIndex16:$lane)), 9138 (SubReg_i16_lane VectorIndex16:$lane))>; 9139} 9140 9141defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>; 9142defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>; 9143 9144def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0, 9145 (outs DPR:$Vd), (ins QPR:$Vm), 9146 NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>; 9147} 9148// End of BFloat16 instructions 9149