1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the ARM NEON instruction set. 10// 11//===----------------------------------------------------------------------===// 12 13 14//===----------------------------------------------------------------------===// 15// NEON-specific Operands. 16//===----------------------------------------------------------------------===// 17def nModImm : Operand<i32> { 18 let PrintMethod = "printVMOVModImmOperand"; 19} 20 21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 22def nImmSplatI8 : Operand<i32> { 23 let PrintMethod = "printVMOVModImmOperand"; 24 let ParserMatchClass = nImmSplatI8AsmOperand; 25} 26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 27def nImmSplatI16 : Operand<i32> { 28 let PrintMethod = "printVMOVModImmOperand"; 29 let ParserMatchClass = nImmSplatI16AsmOperand; 30} 31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 32def nImmSplatI32 : Operand<i32> { 33 let PrintMethod = "printVMOVModImmOperand"; 34 let ParserMatchClass = nImmSplatI32AsmOperand; 35} 36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 37def nImmSplatNotI16 : Operand<i32> { 38 let ParserMatchClass = nImmSplatNotI16AsmOperand; 39} 40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 41def nImmSplatNotI32 : Operand<i32> { 42 let ParserMatchClass = nImmSplatNotI32AsmOperand; 43} 44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 45def nImmVMOVI32 : Operand<i32> { 46 let PrintMethod = "printVMOVModImmOperand"; 47 let ParserMatchClass = nImmVMOVI32AsmOperand; 48} 49 50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To> 51 : AsmOperandClass { 52 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate"; 53 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">"; 54 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands"; 55} 56 57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To> 58 : AsmOperandClass { 59 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate"; 60 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">"; 61 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands"; 62} 63 64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> { 65 let PrintMethod = "printVMOVModImmOperand"; 66 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>; 67} 68 69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> { 70 let PrintMethod = "printVMOVModImmOperand"; 71 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>; 72} 73 74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 75def nImmVMOVI32Neg : Operand<i32> { 76 let PrintMethod = "printVMOVModImmOperand"; 77 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 78} 79def nImmVMOVF32 : Operand<i32> { 80 let PrintMethod = "printFPImmOperand"; 81 let ParserMatchClass = FPImmOperand; 82} 83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 84def nImmSplatI64 : Operand<i32> { 85 let PrintMethod = "printVMOVModImmOperand"; 86 let ParserMatchClass = nImmSplatI64AsmOperand; 87} 88 89def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } 93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 94 return ((uint64_t)Imm) < 8; 95}]> { 96 let ParserMatchClass = VectorIndex8Operand; 97 let PrintMethod = "printVectorIndex"; 98 let MIOperandInfo = (ops i32imm); 99} 100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 101 return ((uint64_t)Imm) < 4; 102}]> { 103 let ParserMatchClass = VectorIndex16Operand; 104 let PrintMethod = "printVectorIndex"; 105 let MIOperandInfo = (ops i32imm); 106} 107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 108 return ((uint64_t)Imm) < 2; 109}]> { 110 let ParserMatchClass = VectorIndex32Operand; 111 let PrintMethod = "printVectorIndex"; 112 let MIOperandInfo = (ops i32imm); 113} 114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{ 115 return ((uint64_t)Imm) < 1; 116}]> { 117 let ParserMatchClass = VectorIndex64Operand; 118 let PrintMethod = "printVectorIndex"; 119 let MIOperandInfo = (ops i32imm); 120} 121 122// Register list of one D register. 123def VecListOneDAsmOperand : AsmOperandClass { 124 let Name = "VecListOneD"; 125 let ParserMethod = "parseVectorList"; 126 let RenderMethod = "addVecListOperands"; 127} 128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 129 let ParserMatchClass = VecListOneDAsmOperand; 130} 131// Register list of two sequential D registers. 132def VecListDPairAsmOperand : AsmOperandClass { 133 let Name = "VecListDPair"; 134 let ParserMethod = "parseVectorList"; 135 let RenderMethod = "addVecListOperands"; 136} 137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 138 let ParserMatchClass = VecListDPairAsmOperand; 139} 140// Register list of three sequential D registers. 141def VecListThreeDAsmOperand : AsmOperandClass { 142 let Name = "VecListThreeD"; 143 let ParserMethod = "parseVectorList"; 144 let RenderMethod = "addVecListOperands"; 145} 146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 147 let ParserMatchClass = VecListThreeDAsmOperand; 148} 149// Register list of four sequential D registers. 150def VecListFourDAsmOperand : AsmOperandClass { 151 let Name = "VecListFourD"; 152 let ParserMethod = "parseVectorList"; 153 let RenderMethod = "addVecListOperands"; 154} 155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 156 let ParserMatchClass = VecListFourDAsmOperand; 157} 158// Register list of two D registers spaced by 2 (two sequential Q registers). 159def VecListDPairSpacedAsmOperand : AsmOperandClass { 160 let Name = "VecListDPairSpaced"; 161 let ParserMethod = "parseVectorList"; 162 let RenderMethod = "addVecListOperands"; 163} 164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 165 let ParserMatchClass = VecListDPairSpacedAsmOperand; 166} 167// Register list of three D registers spaced by 2 (three Q registers). 168def VecListThreeQAsmOperand : AsmOperandClass { 169 let Name = "VecListThreeQ"; 170 let ParserMethod = "parseVectorList"; 171 let RenderMethod = "addVecListOperands"; 172} 173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 174 let ParserMatchClass = VecListThreeQAsmOperand; 175} 176// Register list of three D registers spaced by 2 (three Q registers). 177def VecListFourQAsmOperand : AsmOperandClass { 178 let Name = "VecListFourQ"; 179 let ParserMethod = "parseVectorList"; 180 let RenderMethod = "addVecListOperands"; 181} 182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 183 let ParserMatchClass = VecListFourQAsmOperand; 184} 185 186// Register list of one D register, with "all lanes" subscripting. 187def VecListOneDAllLanesAsmOperand : AsmOperandClass { 188 let Name = "VecListOneDAllLanes"; 189 let ParserMethod = "parseVectorList"; 190 let RenderMethod = "addVecListOperands"; 191} 192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 193 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 194} 195// Register list of two D registers, with "all lanes" subscripting. 196def VecListDPairAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListDPairAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200} 201def VecListDPairAllLanes : RegisterOperand<DPair, 202 "printVectorListTwoAllLanes"> { 203 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 204} 205// Register list of two D registers spaced by 2 (two sequential Q registers). 206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 207 let Name = "VecListDPairSpacedAllLanes"; 208 let ParserMethod = "parseVectorList"; 209 let RenderMethod = "addVecListOperands"; 210} 211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc, 212 "printVectorListTwoSpacedAllLanes"> { 213 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 214} 215// Register list of three D registers, with "all lanes" subscripting. 216def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 217 let Name = "VecListThreeDAllLanes"; 218 let ParserMethod = "parseVectorList"; 219 let RenderMethod = "addVecListOperands"; 220} 221def VecListThreeDAllLanes : RegisterOperand<DPR, 222 "printVectorListThreeAllLanes"> { 223 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 224} 225// Register list of three D registers spaced by 2 (three sequential Q regs). 226def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 227 let Name = "VecListThreeQAllLanes"; 228 let ParserMethod = "parseVectorList"; 229 let RenderMethod = "addVecListOperands"; 230} 231def VecListThreeQAllLanes : RegisterOperand<DPR, 232 "printVectorListThreeSpacedAllLanes"> { 233 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 234} 235// Register list of four D registers, with "all lanes" subscripting. 236def VecListFourDAllLanesAsmOperand : AsmOperandClass { 237 let Name = "VecListFourDAllLanes"; 238 let ParserMethod = "parseVectorList"; 239 let RenderMethod = "addVecListOperands"; 240} 241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 242 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 243} 244// Register list of four D registers spaced by 2 (four sequential Q regs). 245def VecListFourQAllLanesAsmOperand : AsmOperandClass { 246 let Name = "VecListFourQAllLanes"; 247 let ParserMethod = "parseVectorList"; 248 let RenderMethod = "addVecListOperands"; 249} 250def VecListFourQAllLanes : RegisterOperand<DPR, 251 "printVectorListFourSpacedAllLanes"> { 252 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 253} 254 255 256// Register list of one D register, with byte lane subscripting. 257def VecListOneDByteIndexAsmOperand : AsmOperandClass { 258 let Name = "VecListOneDByteIndexed"; 259 let ParserMethod = "parseVectorList"; 260 let RenderMethod = "addVecListIndexedOperands"; 261} 262def VecListOneDByteIndexed : Operand<i32> { 263 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 264 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 265} 266// ...with half-word lane subscripting. 267def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 268 let Name = "VecListOneDHWordIndexed"; 269 let ParserMethod = "parseVectorList"; 270 let RenderMethod = "addVecListIndexedOperands"; 271} 272def VecListOneDHWordIndexed : Operand<i32> { 273 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 274 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 275} 276// ...with word lane subscripting. 277def VecListOneDWordIndexAsmOperand : AsmOperandClass { 278 let Name = "VecListOneDWordIndexed"; 279 let ParserMethod = "parseVectorList"; 280 let RenderMethod = "addVecListIndexedOperands"; 281} 282def VecListOneDWordIndexed : Operand<i32> { 283 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 284 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 285} 286 287// Register list of two D registers with byte lane subscripting. 288def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoDByteIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListTwoDByteIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297// ...with half-word lane subscripting. 298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 299 let Name = "VecListTwoDHWordIndexed"; 300 let ParserMethod = "parseVectorList"; 301 let RenderMethod = "addVecListIndexedOperands"; 302} 303def VecListTwoDHWordIndexed : Operand<i32> { 304 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 305 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 306} 307// ...with word lane subscripting. 308def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 309 let Name = "VecListTwoDWordIndexed"; 310 let ParserMethod = "parseVectorList"; 311 let RenderMethod = "addVecListIndexedOperands"; 312} 313def VecListTwoDWordIndexed : Operand<i32> { 314 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 315 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 316} 317// Register list of two Q registers with half-word lane subscripting. 318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 319 let Name = "VecListTwoQHWordIndexed"; 320 let ParserMethod = "parseVectorList"; 321 let RenderMethod = "addVecListIndexedOperands"; 322} 323def VecListTwoQHWordIndexed : Operand<i32> { 324 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 325 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 326} 327// ...with word lane subscripting. 328def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 329 let Name = "VecListTwoQWordIndexed"; 330 let ParserMethod = "parseVectorList"; 331 let RenderMethod = "addVecListIndexedOperands"; 332} 333def VecListTwoQWordIndexed : Operand<i32> { 334 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 335 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 336} 337 338 339// Register list of three D registers with byte lane subscripting. 340def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeDByteIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344} 345def VecListThreeDByteIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348} 349// ...with half-word lane subscripting. 350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 351 let Name = "VecListThreeDHWordIndexed"; 352 let ParserMethod = "parseVectorList"; 353 let RenderMethod = "addVecListIndexedOperands"; 354} 355def VecListThreeDHWordIndexed : Operand<i32> { 356 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 357 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 358} 359// ...with word lane subscripting. 360def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 361 let Name = "VecListThreeDWordIndexed"; 362 let ParserMethod = "parseVectorList"; 363 let RenderMethod = "addVecListIndexedOperands"; 364} 365def VecListThreeDWordIndexed : Operand<i32> { 366 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 367 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 368} 369// Register list of three Q registers with half-word lane subscripting. 370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 371 let Name = "VecListThreeQHWordIndexed"; 372 let ParserMethod = "parseVectorList"; 373 let RenderMethod = "addVecListIndexedOperands"; 374} 375def VecListThreeQHWordIndexed : Operand<i32> { 376 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 377 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 378} 379// ...with word lane subscripting. 380def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 381 let Name = "VecListThreeQWordIndexed"; 382 let ParserMethod = "parseVectorList"; 383 let RenderMethod = "addVecListIndexedOperands"; 384} 385def VecListThreeQWordIndexed : Operand<i32> { 386 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 387 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 388} 389 390// Register list of four D registers with byte lane subscripting. 391def VecListFourDByteIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourDByteIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListFourDByteIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400// ...with half-word lane subscripting. 401def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 402 let Name = "VecListFourDHWordIndexed"; 403 let ParserMethod = "parseVectorList"; 404 let RenderMethod = "addVecListIndexedOperands"; 405} 406def VecListFourDHWordIndexed : Operand<i32> { 407 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 408 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 409} 410// ...with word lane subscripting. 411def VecListFourDWordIndexAsmOperand : AsmOperandClass { 412 let Name = "VecListFourDWordIndexed"; 413 let ParserMethod = "parseVectorList"; 414 let RenderMethod = "addVecListIndexedOperands"; 415} 416def VecListFourDWordIndexed : Operand<i32> { 417 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 418 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 419} 420// Register list of four Q registers with half-word lane subscripting. 421def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 422 let Name = "VecListFourQHWordIndexed"; 423 let ParserMethod = "parseVectorList"; 424 let RenderMethod = "addVecListIndexedOperands"; 425} 426def VecListFourQHWordIndexed : Operand<i32> { 427 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 428 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 429} 430// ...with word lane subscripting. 431def VecListFourQWordIndexAsmOperand : AsmOperandClass { 432 let Name = "VecListFourQWordIndexed"; 433 let ParserMethod = "parseVectorList"; 434 let RenderMethod = "addVecListIndexedOperands"; 435} 436def VecListFourQWordIndexed : Operand<i32> { 437 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 438 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 439} 440 441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 442 return cast<LoadSDNode>(N)->getAlignment() >= 8; 443}]>; 444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 445 (store node:$val, node:$ptr), [{ 446 return cast<StoreSDNode>(N)->getAlignment() >= 8; 447}]>; 448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 449 return cast<LoadSDNode>(N)->getAlignment() == 4; 450}]>; 451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 452 (store node:$val, node:$ptr), [{ 453 return cast<StoreSDNode>(N)->getAlignment() == 4; 454}]>; 455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 456 return cast<LoadSDNode>(N)->getAlignment() == 2; 457}]>; 458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 459 (store node:$val, node:$ptr), [{ 460 return cast<StoreSDNode>(N)->getAlignment() == 2; 461}]>; 462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 463 return cast<LoadSDNode>(N)->getAlignment() == 1; 464}]>; 465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 466 (store node:$val, node:$ptr), [{ 467 return cast<StoreSDNode>(N)->getAlignment() == 1; 468}]>; 469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 470 return cast<LoadSDNode>(N)->getAlignment() < 4; 471}]>; 472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 473 (store node:$val, node:$ptr), [{ 474 return cast<StoreSDNode>(N)->getAlignment() < 4; 475}]>; 476 477//===----------------------------------------------------------------------===// 478// NEON-specific DAG Nodes. 479//===----------------------------------------------------------------------===// 480 481def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 482def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; 483 484// Types for vector shift by immediates. The "SHX" version is for long and 485// narrow operations where the source and destination vectors have different 486// types. The "SHINS" version is for shift and insert operations. 487def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 488 SDTCisVT<2, i32>]>; 489def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 490 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 491 492def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; 493 494def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; 495def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>; 496def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>; 497 498def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>; 499def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>; 500def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>; 501def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>; 502def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>; 503def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>; 504 505def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>; 506def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>; 507def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; 508 509def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; 510def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; 511 512def NEONvbsl : SDNode<"ARMISD::VBSL", 513 SDTypeProfile<1, 3, [SDTCisVec<0>, 514 SDTCisSameAs<0, 1>, 515 SDTCisSameAs<0, 2>, 516 SDTCisSameAs<0, 3>]>>; 517 518def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 519 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 520def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 521 522def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 523 SDTCisSameAs<0, 2>, 524 SDTCisSameAs<0, 3>]>; 525def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 526def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 527def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 528 529def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 530 SDTCisVT<2, v8i8>]>; 531def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 532 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; 533def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; 534def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; 535 536 537def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{ 538 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 539 unsigned EltBits = 0; 540 uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits); 541 return (EltBits == 32 && EltVal == 0); 542}]>; 543 544def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{ 545 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 546 unsigned EltBits = 0; 547 uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits); 548 return (EltBits == 8 && EltVal == 0xff); 549}]>; 550 551//===----------------------------------------------------------------------===// 552// NEON load / store instructions 553//===----------------------------------------------------------------------===// 554 555// Use VLDM to load a Q register as a D register pair. 556// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 557def VLDMQIA 558 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 559 IIC_fpLoad_m, "", 560 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; 561 562// Use VSTM to store a Q register as a D register pair. 563// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 564def VSTMQIA 565 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 566 IIC_fpStore_m, "", 567 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; 568 569// Classes for VLD* pseudo-instructions with multi-register operands. 570// These are expanded to real instructions after register allocation. 571class VLDQPseudo<InstrItinClass itin> 572 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 573class VLDQWBPseudo<InstrItinClass itin> 574 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 575 (ins addrmode6:$addr, am6offset:$offset), itin, 576 "$addr.addr = $wb">; 577class VLDQWBfixedPseudo<InstrItinClass itin> 578 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 579 (ins addrmode6:$addr), itin, 580 "$addr.addr = $wb">; 581class VLDQWBregisterPseudo<InstrItinClass itin> 582 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 583 (ins addrmode6:$addr, rGPR:$offset), itin, 584 "$addr.addr = $wb">; 585 586class VLDQQPseudo<InstrItinClass itin> 587 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 588class VLDQQWBPseudo<InstrItinClass itin> 589 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 590 (ins addrmode6:$addr, am6offset:$offset), itin, 591 "$addr.addr = $wb">; 592class VLDQQWBfixedPseudo<InstrItinClass itin> 593 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 594 (ins addrmode6:$addr), itin, 595 "$addr.addr = $wb">; 596class VLDQQWBregisterPseudo<InstrItinClass itin> 597 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 598 (ins addrmode6:$addr, rGPR:$offset), itin, 599 "$addr.addr = $wb">; 600 601 602class VLDQQQQPseudo<InstrItinClass itin> 603 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 604 "$src = $dst">; 605class VLDQQQQWBPseudo<InstrItinClass itin> 606 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 607 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 608 "$addr.addr = $wb, $src = $dst">; 609 610let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 611 612// VLD1 : Vector Load (multiple single elements) 613class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 614 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 615 (ins AddrMode:$Rn), IIC_VLD1, 616 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { 617 let Rm = 0b1111; 618 let Inst{4} = Rn{4}; 619 let DecoderMethod = "DecodeVLDST1Instruction"; 620} 621class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 622 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 623 (ins AddrMode:$Rn), IIC_VLD1x2, 624 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { 625 let Rm = 0b1111; 626 let Inst{5-4} = Rn{5-4}; 627 let DecoderMethod = "DecodeVLDST1Instruction"; 628} 629 630def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 631def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 632def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 633def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 634 635def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 636def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 637def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 638def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 639 640// ...with address register writeback: 641multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 642 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 643 (ins AddrMode:$Rn), IIC_VLD1u, 644 "vld1", Dt, "$Vd, $Rn!", 645 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 646 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 647 let Inst{4} = Rn{4}; 648 let DecoderMethod = "DecodeVLDST1Instruction"; 649 } 650 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 651 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 652 "vld1", Dt, "$Vd, $Rn, $Rm", 653 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 654 let Inst{4} = Rn{4}; 655 let DecoderMethod = "DecodeVLDST1Instruction"; 656 } 657} 658multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 659 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 660 (ins AddrMode:$Rn), IIC_VLD1x2u, 661 "vld1", Dt, "$Vd, $Rn!", 662 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 663 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 664 let Inst{5-4} = Rn{5-4}; 665 let DecoderMethod = "DecodeVLDST1Instruction"; 666 } 667 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 668 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 669 "vld1", Dt, "$Vd, $Rn, $Rm", 670 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 671 let Inst{5-4} = Rn{5-4}; 672 let DecoderMethod = "DecodeVLDST1Instruction"; 673 } 674} 675 676defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 677defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 678defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 679defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 680defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 681defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 682defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 683defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 684 685// ...with 3 registers 686class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 687 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 688 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 689 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { 690 let Rm = 0b1111; 691 let Inst{4} = Rn{4}; 692 let DecoderMethod = "DecodeVLDST1Instruction"; 693} 694multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 695 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 696 (ins AddrMode:$Rn), IIC_VLD1x2u, 697 "vld1", Dt, "$Vd, $Rn!", 698 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 699 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 700 let Inst{4} = Rn{4}; 701 let DecoderMethod = "DecodeVLDST1Instruction"; 702 } 703 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 704 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 705 "vld1", Dt, "$Vd, $Rn, $Rm", 706 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 707 let Inst{4} = Rn{4}; 708 let DecoderMethod = "DecodeVLDST1Instruction"; 709 } 710} 711 712def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 713def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 714def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 715def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 716 717defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 718defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 719defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 720defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 721 722def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 723def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 724def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 725def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 726def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 727def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 728 729def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 730def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 731def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 732def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 733def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 734def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 735def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 736def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 737 738// ...with 4 registers 739class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 740 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 741 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 742 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { 743 let Rm = 0b1111; 744 let Inst{5-4} = Rn{5-4}; 745 let DecoderMethod = "DecodeVLDST1Instruction"; 746} 747multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 748 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 749 (ins AddrMode:$Rn), IIC_VLD1x2u, 750 "vld1", Dt, "$Vd, $Rn!", 751 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 752 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 753 let Inst{5-4} = Rn{5-4}; 754 let DecoderMethod = "DecodeVLDST1Instruction"; 755 } 756 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 757 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 758 "vld1", Dt, "$Vd, $Rn, $Rm", 759 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 760 let Inst{5-4} = Rn{5-4}; 761 let DecoderMethod = "DecodeVLDST1Instruction"; 762 } 763} 764 765def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 766def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 767def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 768def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 769 770defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 771defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 772defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 773defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 774 775def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 776def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 777def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 778def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 779def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 780def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 781 782def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 783def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 784def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 785def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 786def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 787def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 788def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 789def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 790 791// VLD2 : Vector Load (multiple 2-element structures) 792class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 793 InstrItinClass itin, Operand AddrMode> 794 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 795 (ins AddrMode:$Rn), itin, 796 "vld2", Dt, "$Vd, $Rn", "", []> { 797 let Rm = 0b1111; 798 let Inst{5-4} = Rn{5-4}; 799 let DecoderMethod = "DecodeVLDST2Instruction"; 800} 801 802def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 803 addrmode6align64or128>, Sched<[WriteVLD2]>; 804def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 805 addrmode6align64or128>, Sched<[WriteVLD2]>; 806def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 807 addrmode6align64or128>, Sched<[WriteVLD2]>; 808 809def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 810 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 811def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 812 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 813def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 814 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 815 816def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 817def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 818def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 819 820// ...with address register writeback: 821multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 822 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 823 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 824 (ins AddrMode:$Rn), itin, 825 "vld2", Dt, "$Vd, $Rn!", 826 "$Rn.addr = $wb", []> { 827 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 828 let Inst{5-4} = Rn{5-4}; 829 let DecoderMethod = "DecodeVLDST2Instruction"; 830 } 831 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 832 (ins AddrMode:$Rn, rGPR:$Rm), itin, 833 "vld2", Dt, "$Vd, $Rn, $Rm", 834 "$Rn.addr = $wb", []> { 835 let Inst{5-4} = Rn{5-4}; 836 let DecoderMethod = "DecodeVLDST2Instruction"; 837 } 838} 839 840defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 841 addrmode6align64or128>, Sched<[WriteVLD2]>; 842defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 843 addrmode6align64or128>, Sched<[WriteVLD2]>; 844defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 845 addrmode6align64or128>, Sched<[WriteVLD2]>; 846 847defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 848 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 849defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 850 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 851defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 852 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 853 854def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 855def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 856def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 857def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 858def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 859def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 860 861// ...with double-spaced registers 862def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 863 addrmode6align64or128>, Sched<[WriteVLD2]>; 864def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 865 addrmode6align64or128>, Sched<[WriteVLD2]>; 866def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 867 addrmode6align64or128>, Sched<[WriteVLD2]>; 868defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 869 addrmode6align64or128>, Sched<[WriteVLD2]>; 870defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 871 addrmode6align64or128>, Sched<[WriteVLD2]>; 872defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 873 addrmode6align64or128>, Sched<[WriteVLD2]>; 874 875// VLD3 : Vector Load (multiple 3-element structures) 876class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 877 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 878 (ins addrmode6:$Rn), IIC_VLD3, 879 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { 880 let Rm = 0b1111; 881 let Inst{4} = Rn{4}; 882 let DecoderMethod = "DecodeVLDST3Instruction"; 883} 884 885def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 886def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 887def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 888 889def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 890def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 891def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 892 893// ...with address register writeback: 894class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 895 : NLdSt<0, 0b10, op11_8, op7_4, 896 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 897 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 898 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 899 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 900 let Inst{4} = Rn{4}; 901 let DecoderMethod = "DecodeVLDST3Instruction"; 902} 903 904def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 905def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 906def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 907 908def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 909def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 910def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 911 912// ...with double-spaced registers: 913def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 914def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 915def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 916def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 917def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 918def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 919 920def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 921def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 922def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 923 924// ...alternate versions to be allocated odd register numbers: 925def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 926def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 927def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 928 929def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 930def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 931def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 932 933// VLD4 : Vector Load (multiple 4-element structures) 934class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 935 : NLdSt<0, 0b10, op11_8, op7_4, 936 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 937 (ins addrmode6:$Rn), IIC_VLD4, 938 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, 939 Sched<[WriteVLD4]> { 940 let Rm = 0b1111; 941 let Inst{5-4} = Rn{5-4}; 942 let DecoderMethod = "DecodeVLDST4Instruction"; 943} 944 945def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 946def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 947def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 948 949def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 950def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 951def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 952 953// ...with address register writeback: 954class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 955 : NLdSt<0, 0b10, op11_8, op7_4, 956 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 957 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 958 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 959 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 960 let Inst{5-4} = Rn{5-4}; 961 let DecoderMethod = "DecodeVLDST4Instruction"; 962} 963 964def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 965def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 966def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 967 968def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 969def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 970def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 971 972// ...with double-spaced registers: 973def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 974def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 975def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 976def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 977def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 978def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 979 980def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 981def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 982def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 983 984// ...alternate versions to be allocated odd register numbers: 985def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 986def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 987def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 988 989def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 990def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 991def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 992 993} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 994 995// Classes for VLD*LN pseudo-instructions with multi-register operands. 996// These are expanded to real instructions after register allocation. 997class VLDQLNPseudo<InstrItinClass itin> 998 : PseudoNLdSt<(outs QPR:$dst), 999 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1000 itin, "$src = $dst">; 1001class VLDQLNWBPseudo<InstrItinClass itin> 1002 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 1003 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1004 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1005class VLDQQLNPseudo<InstrItinClass itin> 1006 : PseudoNLdSt<(outs QQPR:$dst), 1007 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1008 itin, "$src = $dst">; 1009class VLDQQLNWBPseudo<InstrItinClass itin> 1010 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 1011 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1012 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1013class VLDQQQQLNPseudo<InstrItinClass itin> 1014 : PseudoNLdSt<(outs QQQQPR:$dst), 1015 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1016 itin, "$src = $dst">; 1017class VLDQQQQLNWBPseudo<InstrItinClass itin> 1018 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1019 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1020 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1021 1022// VLD1LN : Vector Load (single element to one lane) 1023class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1024 PatFrag LoadOp> 1025 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1026 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1027 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1028 "$src = $Vd", 1029 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1030 (i32 (LoadOp addrmode6:$Rn)), 1031 imm:$lane))]> { 1032 let Rm = 0b1111; 1033 let DecoderMethod = "DecodeVLD1LN"; 1034} 1035class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1036 PatFrag LoadOp> 1037 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1038 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1039 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1040 "$src = $Vd", 1041 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1042 (i32 (LoadOp addrmode6oneL32:$Rn)), 1043 imm:$lane))]>, Sched<[WriteVLD1]> { 1044 let Rm = 0b1111; 1045 let DecoderMethod = "DecodeVLD1LN"; 1046} 1047class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>, 1048 Sched<[WriteVLD1]> { 1049 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1050 (i32 (LoadOp addrmode6:$addr)), 1051 imm:$lane))]; 1052} 1053 1054def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1055 let Inst{7-5} = lane{2-0}; 1056} 1057def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1058 let Inst{7-6} = lane{1-0}; 1059 let Inst{5-4} = Rn{5-4}; 1060} 1061def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1062 let Inst{7} = lane{0}; 1063 let Inst{5-4} = Rn{5-4}; 1064} 1065 1066def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1067def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1068def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1069 1070let Predicates = [HasNEON] in { 1071def : Pat<(vector_insert (v4f16 DPR:$src), 1072 (f16 (load addrmode6:$addr)), imm:$lane), 1073 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1074def : Pat<(vector_insert (v8f16 QPR:$src), 1075 (f16 (load addrmode6:$addr)), imm:$lane), 1076 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1077def : Pat<(vector_insert (v4bf16 DPR:$src), 1078 (bf16 (load addrmode6:$addr)), imm:$lane), 1079 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1080def : Pat<(vector_insert (v8bf16 QPR:$src), 1081 (bf16 (load addrmode6:$addr)), imm:$lane), 1082 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1083def : Pat<(vector_insert (v2f32 DPR:$src), 1084 (f32 (load addrmode6:$addr)), imm:$lane), 1085 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1086def : Pat<(vector_insert (v4f32 QPR:$src), 1087 (f32 (load addrmode6:$addr)), imm:$lane), 1088 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1089 1090// A 64-bit subvector insert to the first 128-bit vector position 1091// is a subregister copy that needs no instruction. 1092def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), 1093 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1094def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), 1095 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1096def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), 1097 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1098def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), 1099 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1100def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), 1101 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1102def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), 1103 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1104} 1105 1106 1107let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1108 1109// ...with address register writeback: 1110class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1111 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1112 (ins addrmode6:$Rn, am6offset:$Rm, 1113 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1114 "\\{$Vd[$lane]\\}, $Rn$Rm", 1115 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1116 let DecoderMethod = "DecodeVLD1LN"; 1117} 1118 1119def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1120 let Inst{7-5} = lane{2-0}; 1121} 1122def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1123 let Inst{7-6} = lane{1-0}; 1124 let Inst{4} = Rn{4}; 1125} 1126def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1127 let Inst{7} = lane{0}; 1128 let Inst{5} = Rn{4}; 1129 let Inst{4} = Rn{4}; 1130} 1131 1132def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1133def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1134def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1135 1136// VLD2LN : Vector Load (single 2-element structure to one lane) 1137class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1138 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1139 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1140 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1141 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { 1142 let Rm = 0b1111; 1143 let Inst{4} = Rn{4}; 1144 let DecoderMethod = "DecodeVLD2LN"; 1145} 1146 1147def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1148 let Inst{7-5} = lane{2-0}; 1149} 1150def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1151 let Inst{7-6} = lane{1-0}; 1152} 1153def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1154 let Inst{7} = lane{0}; 1155} 1156 1157def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1158def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1159def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1160 1161// ...with double-spaced registers: 1162def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1163 let Inst{7-6} = lane{1-0}; 1164} 1165def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1166 let Inst{7} = lane{0}; 1167} 1168 1169def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1170def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1171 1172// ...with address register writeback: 1173class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1174 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1175 (ins addrmode6:$Rn, am6offset:$Rm, 1176 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1177 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1178 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1179 let Inst{4} = Rn{4}; 1180 let DecoderMethod = "DecodeVLD2LN"; 1181} 1182 1183def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1184 let Inst{7-5} = lane{2-0}; 1185} 1186def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1187 let Inst{7-6} = lane{1-0}; 1188} 1189def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1190 let Inst{7} = lane{0}; 1191} 1192 1193def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1194def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1195def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1196 1197def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1198 let Inst{7-6} = lane{1-0}; 1199} 1200def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1201 let Inst{7} = lane{0}; 1202} 1203 1204def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1205def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1206 1207// VLD3LN : Vector Load (single 3-element structure to one lane) 1208class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1209 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1210 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1211 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1212 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1213 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { 1214 let Rm = 0b1111; 1215 let DecoderMethod = "DecodeVLD3LN"; 1216} 1217 1218def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1219 let Inst{7-5} = lane{2-0}; 1220} 1221def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1222 let Inst{7-6} = lane{1-0}; 1223} 1224def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1225 let Inst{7} = lane{0}; 1226} 1227 1228def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1229def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1230def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1231 1232// ...with double-spaced registers: 1233def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1234 let Inst{7-6} = lane{1-0}; 1235} 1236def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1237 let Inst{7} = lane{0}; 1238} 1239 1240def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1241def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1242 1243// ...with address register writeback: 1244class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1245 : NLdStLn<1, 0b10, op11_8, op7_4, 1246 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1247 (ins addrmode6:$Rn, am6offset:$Rm, 1248 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1249 IIC_VLD3lnu, "vld3", Dt, 1250 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1251 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1252 []>, Sched<[WriteVLD2]> { 1253 let DecoderMethod = "DecodeVLD3LN"; 1254} 1255 1256def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1257 let Inst{7-5} = lane{2-0}; 1258} 1259def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1260 let Inst{7-6} = lane{1-0}; 1261} 1262def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1263 let Inst{7} = lane{0}; 1264} 1265 1266def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1267def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1268def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1269 1270def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1271 let Inst{7-6} = lane{1-0}; 1272} 1273def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1274 let Inst{7} = lane{0}; 1275} 1276 1277def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1278def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1279 1280// VLD4LN : Vector Load (single 4-element structure to one lane) 1281class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1282 : NLdStLn<1, 0b10, op11_8, op7_4, 1283 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1284 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1285 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1286 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1287 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, 1288 Sched<[WriteVLD2]> { 1289 let Rm = 0b1111; 1290 let Inst{4} = Rn{4}; 1291 let DecoderMethod = "DecodeVLD4LN"; 1292} 1293 1294def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1295 let Inst{7-5} = lane{2-0}; 1296} 1297def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1298 let Inst{7-6} = lane{1-0}; 1299} 1300def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1301 let Inst{7} = lane{0}; 1302 let Inst{5} = Rn{5}; 1303} 1304 1305def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1306def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1307def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1308 1309// ...with double-spaced registers: 1310def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1311 let Inst{7-6} = lane{1-0}; 1312} 1313def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1314 let Inst{7} = lane{0}; 1315 let Inst{5} = Rn{5}; 1316} 1317 1318def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1319def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1320 1321// ...with address register writeback: 1322class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1323 : NLdStLn<1, 0b10, op11_8, op7_4, 1324 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1325 (ins addrmode6:$Rn, am6offset:$Rm, 1326 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1327 IIC_VLD4lnu, "vld4", Dt, 1328"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1329"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1330 []> { 1331 let Inst{4} = Rn{4}; 1332 let DecoderMethod = "DecodeVLD4LN" ; 1333} 1334 1335def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1336 let Inst{7-5} = lane{2-0}; 1337} 1338def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1339 let Inst{7-6} = lane{1-0}; 1340} 1341def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1342 let Inst{7} = lane{0}; 1343 let Inst{5} = Rn{5}; 1344} 1345 1346def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1347def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1348def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1349 1350def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1351 let Inst{7-6} = lane{1-0}; 1352} 1353def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1354 let Inst{7} = lane{0}; 1355 let Inst{5} = Rn{5}; 1356} 1357 1358def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1359def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1360 1361} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1362 1363// VLD1DUP : Vector Load (single element to all lanes) 1364class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1365 Operand AddrMode> 1366 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1367 (ins AddrMode:$Rn), 1368 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1369 [(set VecListOneDAllLanes:$Vd, 1370 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>, 1371 Sched<[WriteVLD2]> { 1372 let Rm = 0b1111; 1373 let Inst{4} = Rn{4}; 1374 let DecoderMethod = "DecodeVLD1DupInstruction"; 1375} 1376def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1377 addrmode6dupalignNone>; 1378def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1379 addrmode6dupalign16>; 1380def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1381 addrmode6dupalign32>; 1382 1383let Predicates = [HasNEON] in { 1384def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1385 (VLD1DUPd32 addrmode6:$addr)>; 1386} 1387 1388class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1389 Operand AddrMode> 1390 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1391 (ins AddrMode:$Rn), IIC_VLD1dup, 1392 "vld1", Dt, "$Vd, $Rn", "", 1393 [(set VecListDPairAllLanes:$Vd, 1394 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1395 let Rm = 0b1111; 1396 let Inst{4} = Rn{4}; 1397 let DecoderMethod = "DecodeVLD1DupInstruction"; 1398} 1399 1400def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1401 addrmode6dupalignNone>; 1402def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1403 addrmode6dupalign16>; 1404def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1405 addrmode6dupalign32>; 1406 1407let Predicates = [HasNEON] in { 1408def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1409 (VLD1DUPq32 addrmode6:$addr)>; 1410} 1411 1412let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1413// ...with address register writeback: 1414multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1415 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1416 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1417 (ins AddrMode:$Rn), IIC_VLD1dupu, 1418 "vld1", Dt, "$Vd, $Rn!", 1419 "$Rn.addr = $wb", []> { 1420 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1421 let Inst{4} = Rn{4}; 1422 let DecoderMethod = "DecodeVLD1DupInstruction"; 1423 } 1424 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1425 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1426 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1427 "vld1", Dt, "$Vd, $Rn, $Rm", 1428 "$Rn.addr = $wb", []> { 1429 let Inst{4} = Rn{4}; 1430 let DecoderMethod = "DecodeVLD1DupInstruction"; 1431 } 1432} 1433multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1434 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1435 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1436 (ins AddrMode:$Rn), IIC_VLD1dupu, 1437 "vld1", Dt, "$Vd, $Rn!", 1438 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1439 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1440 let Inst{4} = Rn{4}; 1441 let DecoderMethod = "DecodeVLD1DupInstruction"; 1442 } 1443 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1444 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1445 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1446 "vld1", Dt, "$Vd, $Rn, $Rm", 1447 "$Rn.addr = $wb", []> { 1448 let Inst{4} = Rn{4}; 1449 let DecoderMethod = "DecodeVLD1DupInstruction"; 1450 } 1451} 1452 1453defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1454defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1455defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1456 1457defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1458defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1459defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1460 1461// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1462class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1463 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1464 (ins AddrMode:$Rn), IIC_VLD2dup, 1465 "vld2", Dt, "$Vd, $Rn", "", []> { 1466 let Rm = 0b1111; 1467 let Inst{4} = Rn{4}; 1468 let DecoderMethod = "DecodeVLD2DupInstruction"; 1469} 1470 1471def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1472 addrmode6dupalign16>; 1473def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1474 addrmode6dupalign32>; 1475def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1476 addrmode6dupalign64>; 1477 1478// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1479// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1480// ...with double-spaced registers 1481def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1482 addrmode6dupalign16>; 1483def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1484 addrmode6dupalign32>; 1485def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1486 addrmode6dupalign64>; 1487 1488def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1489def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1490def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1491def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1492def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1493def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1494 1495// ...with address register writeback: 1496multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1497 Operand AddrMode> { 1498 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1499 (outs VdTy:$Vd, GPR:$wb), 1500 (ins AddrMode:$Rn), IIC_VLD2dupu, 1501 "vld2", Dt, "$Vd, $Rn!", 1502 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1503 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1504 let Inst{4} = Rn{4}; 1505 let DecoderMethod = "DecodeVLD2DupInstruction"; 1506 } 1507 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1508 (outs VdTy:$Vd, GPR:$wb), 1509 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1510 "vld2", Dt, "$Vd, $Rn, $Rm", 1511 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1512 let Inst{4} = Rn{4}; 1513 let DecoderMethod = "DecodeVLD2DupInstruction"; 1514 } 1515} 1516 1517defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1518 addrmode6dupalign16>; 1519defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1520 addrmode6dupalign32>; 1521defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1522 addrmode6dupalign64>; 1523 1524defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1525 addrmode6dupalign16>; 1526defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1527 addrmode6dupalign32>; 1528defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1529 addrmode6dupalign64>; 1530 1531// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1532class VLD3DUP<bits<4> op7_4, string Dt> 1533 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1534 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1535 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, 1536 Sched<[WriteVLD2]> { 1537 let Rm = 0b1111; 1538 let Inst{4} = 0; 1539 let DecoderMethod = "DecodeVLD3DupInstruction"; 1540} 1541 1542def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1543def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1544def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1545 1546def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1547def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1548def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1549 1550// ...with double-spaced registers (not used for codegen): 1551def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1552def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1553def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1554 1555def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1556def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1557def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1558def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1559def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1560def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1561 1562// ...with address register writeback: 1563class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1564 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1565 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1566 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1567 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1568 let Inst{4} = 0; 1569 let DecoderMethod = "DecodeVLD3DupInstruction"; 1570} 1571 1572def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1573def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1574def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1575 1576def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1577def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1578def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1579 1580def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1581def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1582def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1583 1584// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1585class VLD4DUP<bits<4> op7_4, string Dt> 1586 : NLdSt<1, 0b10, 0b1111, op7_4, 1587 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1588 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1589 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1590 let Rm = 0b1111; 1591 let Inst{4} = Rn{4}; 1592 let DecoderMethod = "DecodeVLD4DupInstruction"; 1593} 1594 1595def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1596def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1597def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1598 1599def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1600def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1601def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1602 1603// ...with double-spaced registers (not used for codegen): 1604def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1605def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1606def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1607 1608def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1609def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1610def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1611def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1612def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1613def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1614 1615// ...with address register writeback: 1616class VLD4DUPWB<bits<4> op7_4, string Dt> 1617 : NLdSt<1, 0b10, 0b1111, op7_4, 1618 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1619 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1620 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1621 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1622 let Inst{4} = Rn{4}; 1623 let DecoderMethod = "DecodeVLD4DupInstruction"; 1624} 1625 1626def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1627def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1628def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1629 1630def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1631def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1632def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1633 1634def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1635def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1636def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1637 1638} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1639 1640let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1641 1642// Classes for VST* pseudo-instructions with multi-register operands. 1643// These are expanded to real instructions after register allocation. 1644class VSTQPseudo<InstrItinClass itin> 1645 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1646class VSTQWBPseudo<InstrItinClass itin> 1647 : PseudoNLdSt<(outs GPR:$wb), 1648 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1649 "$addr.addr = $wb">; 1650class VSTQWBfixedPseudo<InstrItinClass itin> 1651 : PseudoNLdSt<(outs GPR:$wb), 1652 (ins addrmode6:$addr, QPR:$src), itin, 1653 "$addr.addr = $wb">; 1654class VSTQWBregisterPseudo<InstrItinClass itin> 1655 : PseudoNLdSt<(outs GPR:$wb), 1656 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1657 "$addr.addr = $wb">; 1658class VSTQQPseudo<InstrItinClass itin> 1659 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1660class VSTQQWBPseudo<InstrItinClass itin> 1661 : PseudoNLdSt<(outs GPR:$wb), 1662 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1663 "$addr.addr = $wb">; 1664class VSTQQWBfixedPseudo<InstrItinClass itin> 1665 : PseudoNLdSt<(outs GPR:$wb), 1666 (ins addrmode6:$addr, QQPR:$src), itin, 1667 "$addr.addr = $wb">; 1668class VSTQQWBregisterPseudo<InstrItinClass itin> 1669 : PseudoNLdSt<(outs GPR:$wb), 1670 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1671 "$addr.addr = $wb">; 1672 1673class VSTQQQQPseudo<InstrItinClass itin> 1674 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1675class VSTQQQQWBPseudo<InstrItinClass itin> 1676 : PseudoNLdSt<(outs GPR:$wb), 1677 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1678 "$addr.addr = $wb">; 1679 1680// VST1 : Vector Store (multiple single elements) 1681class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1682 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1683 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { 1684 let Rm = 0b1111; 1685 let Inst{4} = Rn{4}; 1686 let DecoderMethod = "DecodeVLDST1Instruction"; 1687} 1688class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1689 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1690 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { 1691 let Rm = 0b1111; 1692 let Inst{5-4} = Rn{5-4}; 1693 let DecoderMethod = "DecodeVLDST1Instruction"; 1694} 1695 1696def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1697def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1698def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1699def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1700 1701def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1702def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1703def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1704def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1705 1706// ...with address register writeback: 1707multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1708 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1709 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1710 "vst1", Dt, "$Vd, $Rn!", 1711 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1712 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1713 let Inst{4} = Rn{4}; 1714 let DecoderMethod = "DecodeVLDST1Instruction"; 1715 } 1716 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1717 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1718 IIC_VLD1u, 1719 "vst1", Dt, "$Vd, $Rn, $Rm", 1720 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1721 let Inst{4} = Rn{4}; 1722 let DecoderMethod = "DecodeVLDST1Instruction"; 1723 } 1724} 1725multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1726 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1727 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1728 "vst1", Dt, "$Vd, $Rn!", 1729 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1730 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1731 let Inst{5-4} = Rn{5-4}; 1732 let DecoderMethod = "DecodeVLDST1Instruction"; 1733 } 1734 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1735 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1736 IIC_VLD1x2u, 1737 "vst1", Dt, "$Vd, $Rn, $Rm", 1738 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1739 let Inst{5-4} = Rn{5-4}; 1740 let DecoderMethod = "DecodeVLDST1Instruction"; 1741 } 1742} 1743 1744defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1745defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1746defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1747defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1748 1749defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1750defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1751defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1752defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1753 1754// ...with 3 registers 1755class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1756 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1757 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1758 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { 1759 let Rm = 0b1111; 1760 let Inst{4} = Rn{4}; 1761 let DecoderMethod = "DecodeVLDST1Instruction"; 1762} 1763multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1764 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1765 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1766 "vst1", Dt, "$Vd, $Rn!", 1767 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1768 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1769 let Inst{5-4} = Rn{5-4}; 1770 let DecoderMethod = "DecodeVLDST1Instruction"; 1771 } 1772 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1773 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1774 IIC_VLD1x3u, 1775 "vst1", Dt, "$Vd, $Rn, $Rm", 1776 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1777 let Inst{5-4} = Rn{5-4}; 1778 let DecoderMethod = "DecodeVLDST1Instruction"; 1779 } 1780} 1781 1782def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1783def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1784def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1785def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1786 1787defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1788defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1789defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1790defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1791 1792def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1793def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1794def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1795def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1796def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1797def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1798 1799def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1800def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1801def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1802def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1803def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1804def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1805def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1806def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1807 1808// ...with 4 registers 1809class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1810 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1811 (ins AddrMode:$Rn, VecListFourD:$Vd), 1812 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1813 []>, Sched<[WriteVST4]> { 1814 let Rm = 0b1111; 1815 let Inst{5-4} = Rn{5-4}; 1816 let DecoderMethod = "DecodeVLDST1Instruction"; 1817} 1818multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1819 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1820 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1821 "vst1", Dt, "$Vd, $Rn!", 1822 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1823 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1824 let Inst{5-4} = Rn{5-4}; 1825 let DecoderMethod = "DecodeVLDST1Instruction"; 1826 } 1827 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1828 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1829 IIC_VLD1x4u, 1830 "vst1", Dt, "$Vd, $Rn, $Rm", 1831 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1832 let Inst{5-4} = Rn{5-4}; 1833 let DecoderMethod = "DecodeVLDST1Instruction"; 1834 } 1835} 1836 1837def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1838def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1839def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1840def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1841 1842defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1843defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1844defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1845defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1846 1847def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1848def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1849def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1850def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1851def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1852def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1853 1854def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1855def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1856def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1857def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1858def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1859def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1860def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1861def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1862 1863// VST2 : Vector Store (multiple 2-element structures) 1864class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1865 InstrItinClass itin, Operand AddrMode> 1866 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1867 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1868 let Rm = 0b1111; 1869 let Inst{5-4} = Rn{5-4}; 1870 let DecoderMethod = "DecodeVLDST2Instruction"; 1871} 1872 1873def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1874 addrmode6align64or128>, Sched<[WriteVST2]>; 1875def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1876 addrmode6align64or128>, Sched<[WriteVST2]>; 1877def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1878 addrmode6align64or128>, Sched<[WriteVST2]>; 1879 1880def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1881 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1882def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1883 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1884def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1885 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1886 1887def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1888def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1889def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1890 1891// ...with address register writeback: 1892multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1893 RegisterOperand VdTy, Operand AddrMode> { 1894 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1895 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1896 "vst2", Dt, "$Vd, $Rn!", 1897 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1898 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1899 let Inst{5-4} = Rn{5-4}; 1900 let DecoderMethod = "DecodeVLDST2Instruction"; 1901 } 1902 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1903 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1904 "vst2", Dt, "$Vd, $Rn, $Rm", 1905 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1906 let Inst{5-4} = Rn{5-4}; 1907 let DecoderMethod = "DecodeVLDST2Instruction"; 1908 } 1909} 1910multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1911 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1912 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1913 "vst2", Dt, "$Vd, $Rn!", 1914 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1915 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1916 let Inst{5-4} = Rn{5-4}; 1917 let DecoderMethod = "DecodeVLDST2Instruction"; 1918 } 1919 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1920 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1921 IIC_VLD1u, 1922 "vst2", Dt, "$Vd, $Rn, $Rm", 1923 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1924 let Inst{5-4} = Rn{5-4}; 1925 let DecoderMethod = "DecodeVLDST2Instruction"; 1926 } 1927} 1928 1929defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1930 addrmode6align64or128>; 1931defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1932 addrmode6align64or128>; 1933defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1934 addrmode6align64or128>; 1935 1936defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1937defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1938defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1939 1940def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1941def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1942def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1943def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1944def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1945def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1946 1947// ...with double-spaced registers 1948def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1949 addrmode6align64or128>; 1950def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1951 addrmode6align64or128>; 1952def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1953 addrmode6align64or128>; 1954defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 1955 addrmode6align64or128>; 1956defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 1957 addrmode6align64or128>; 1958defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 1959 addrmode6align64or128>; 1960 1961// VST3 : Vector Store (multiple 3-element structures) 1962class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1963 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1964 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1965 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { 1966 let Rm = 0b1111; 1967 let Inst{4} = Rn{4}; 1968 let DecoderMethod = "DecodeVLDST3Instruction"; 1969} 1970 1971def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1972def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1973def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1974 1975def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1976def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1977def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1978 1979// ...with address register writeback: 1980class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1981 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1982 (ins addrmode6:$Rn, am6offset:$Rm, 1983 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1984 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1985 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1986 let Inst{4} = Rn{4}; 1987 let DecoderMethod = "DecodeVLDST3Instruction"; 1988} 1989 1990def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1991def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1992def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1993 1994def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 1995def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 1996def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 1997 1998// ...with double-spaced registers: 1999def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 2000def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 2001def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 2002def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 2003def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 2004def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 2005 2006def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2007def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2008def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2009 2010// ...alternate versions to be allocated odd register numbers: 2011def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2012def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2013def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2014 2015def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2016def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2017def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2018 2019// VST4 : Vector Store (multiple 4-element structures) 2020class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 2021 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2022 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 2023 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 2024 "", []>, Sched<[WriteVST4]> { 2025 let Rm = 0b1111; 2026 let Inst{5-4} = Rn{5-4}; 2027 let DecoderMethod = "DecodeVLDST4Instruction"; 2028} 2029 2030def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 2031def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 2032def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 2033 2034def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2035def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2036def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2037 2038// ...with address register writeback: 2039class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2040 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2041 (ins addrmode6:$Rn, am6offset:$Rm, 2042 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 2043 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 2044 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 2045 let Inst{5-4} = Rn{5-4}; 2046 let DecoderMethod = "DecodeVLDST4Instruction"; 2047} 2048 2049def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 2050def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 2051def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 2052 2053def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2054def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2055def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2056 2057// ...with double-spaced registers: 2058def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2059def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2060def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2061def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2062def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2063def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2064 2065def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2066def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2067def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2068 2069// ...alternate versions to be allocated odd register numbers: 2070def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2071def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2072def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2073 2074def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2075def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2076def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2077 2078} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2079 2080// Classes for VST*LN pseudo-instructions with multi-register operands. 2081// These are expanded to real instructions after register allocation. 2082class VSTQLNPseudo<InstrItinClass itin> 2083 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2084 itin, "">; 2085class VSTQLNWBPseudo<InstrItinClass itin> 2086 : PseudoNLdSt<(outs GPR:$wb), 2087 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2088 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2089class VSTQQLNPseudo<InstrItinClass itin> 2090 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2091 itin, "">; 2092class VSTQQLNWBPseudo<InstrItinClass itin> 2093 : PseudoNLdSt<(outs GPR:$wb), 2094 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2095 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2096class VSTQQQQLNPseudo<InstrItinClass itin> 2097 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2098 itin, "">; 2099class VSTQQQQLNWBPseudo<InstrItinClass itin> 2100 : PseudoNLdSt<(outs GPR:$wb), 2101 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2102 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2103 2104// VST1LN : Vector Store (single element from one lane) 2105class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2106 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2107 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2108 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2109 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2110 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, 2111 Sched<[WriteVST1]> { 2112 let Rm = 0b1111; 2113 let DecoderMethod = "DecodeVST1LN"; 2114} 2115class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2116 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> { 2117 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2118 addrmode6:$addr)]; 2119} 2120 2121def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2122 ARMvgetlaneu, addrmode6> { 2123 let Inst{7-5} = lane{2-0}; 2124} 2125def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2126 ARMvgetlaneu, addrmode6> { 2127 let Inst{7-6} = lane{1-0}; 2128 let Inst{4} = Rn{4}; 2129} 2130 2131def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2132 addrmode6oneL32> { 2133 let Inst{7} = lane{0}; 2134 let Inst{5-4} = Rn{5-4}; 2135} 2136 2137def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>; 2138def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>; 2139def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2140 2141let Predicates = [HasNEON] in { 2142def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2143 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2144def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2145 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2146 2147def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr), 2148 (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 2149def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr), 2150 (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2151} 2152 2153// ...with address register writeback: 2154class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2155 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2156 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2157 (ins AdrMode:$Rn, am6offset:$Rm, 2158 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2159 "\\{$Vd[$lane]\\}, $Rn$Rm", 2160 "$Rn.addr = $wb", 2161 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2162 AdrMode:$Rn, am6offset:$Rm))]>, 2163 Sched<[WriteVST1]> { 2164 let DecoderMethod = "DecodeVST1LN"; 2165} 2166class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2167 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> { 2168 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2169 addrmode6:$addr, am6offset:$offset))]; 2170} 2171 2172def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2173 ARMvgetlaneu, addrmode6> { 2174 let Inst{7-5} = lane{2-0}; 2175} 2176def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2177 ARMvgetlaneu, addrmode6> { 2178 let Inst{7-6} = lane{1-0}; 2179 let Inst{4} = Rn{4}; 2180} 2181def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2182 extractelt, addrmode6oneL32> { 2183 let Inst{7} = lane{0}; 2184 let Inst{5-4} = Rn{5-4}; 2185} 2186 2187def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>; 2188def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>; 2189def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2190 2191let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2192 2193// VST2LN : Vector Store (single 2-element structure from one lane) 2194class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2195 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2196 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2197 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2198 "", []>, Sched<[WriteVST1]> { 2199 let Rm = 0b1111; 2200 let Inst{4} = Rn{4}; 2201 let DecoderMethod = "DecodeVST2LN"; 2202} 2203 2204def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2205 let Inst{7-5} = lane{2-0}; 2206} 2207def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2208 let Inst{7-6} = lane{1-0}; 2209} 2210def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2211 let Inst{7} = lane{0}; 2212} 2213 2214def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2215def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2216def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2217 2218// ...with double-spaced registers: 2219def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2220 let Inst{7-6} = lane{1-0}; 2221 let Inst{4} = Rn{4}; 2222} 2223def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2224 let Inst{7} = lane{0}; 2225 let Inst{4} = Rn{4}; 2226} 2227 2228def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2229def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2230 2231// ...with address register writeback: 2232class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2233 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2234 (ins addrmode6:$Rn, am6offset:$Rm, 2235 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2236 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2237 "$Rn.addr = $wb", []> { 2238 let Inst{4} = Rn{4}; 2239 let DecoderMethod = "DecodeVST2LN"; 2240} 2241 2242def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2243 let Inst{7-5} = lane{2-0}; 2244} 2245def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2246 let Inst{7-6} = lane{1-0}; 2247} 2248def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2249 let Inst{7} = lane{0}; 2250} 2251 2252def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2253def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2254def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2255 2256def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2257 let Inst{7-6} = lane{1-0}; 2258} 2259def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2260 let Inst{7} = lane{0}; 2261} 2262 2263def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2264def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2265 2266// VST3LN : Vector Store (single 3-element structure from one lane) 2267class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2268 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2269 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2270 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2271 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, 2272 Sched<[WriteVST2]> { 2273 let Rm = 0b1111; 2274 let DecoderMethod = "DecodeVST3LN"; 2275} 2276 2277def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2278 let Inst{7-5} = lane{2-0}; 2279} 2280def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2281 let Inst{7-6} = lane{1-0}; 2282} 2283def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2284 let Inst{7} = lane{0}; 2285} 2286 2287def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2288def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2289def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2290 2291// ...with double-spaced registers: 2292def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2293 let Inst{7-6} = lane{1-0}; 2294} 2295def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2296 let Inst{7} = lane{0}; 2297} 2298 2299def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2300def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2301 2302// ...with address register writeback: 2303class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2304 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2305 (ins addrmode6:$Rn, am6offset:$Rm, 2306 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2307 IIC_VST3lnu, "vst3", Dt, 2308 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2309 "$Rn.addr = $wb", []> { 2310 let DecoderMethod = "DecodeVST3LN"; 2311} 2312 2313def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2314 let Inst{7-5} = lane{2-0}; 2315} 2316def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2317 let Inst{7-6} = lane{1-0}; 2318} 2319def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2320 let Inst{7} = lane{0}; 2321} 2322 2323def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2324def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2325def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2326 2327def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2328 let Inst{7-6} = lane{1-0}; 2329} 2330def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2331 let Inst{7} = lane{0}; 2332} 2333 2334def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2335def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2336 2337// VST4LN : Vector Store (single 4-element structure from one lane) 2338class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2339 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2340 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2341 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2342 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2343 "", []>, Sched<[WriteVST2]> { 2344 let Rm = 0b1111; 2345 let Inst{4} = Rn{4}; 2346 let DecoderMethod = "DecodeVST4LN"; 2347} 2348 2349def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2350 let Inst{7-5} = lane{2-0}; 2351} 2352def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2353 let Inst{7-6} = lane{1-0}; 2354} 2355def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2356 let Inst{7} = lane{0}; 2357 let Inst{5} = Rn{5}; 2358} 2359 2360def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2361def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2362def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2363 2364// ...with double-spaced registers: 2365def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2366 let Inst{7-6} = lane{1-0}; 2367} 2368def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2369 let Inst{7} = lane{0}; 2370 let Inst{5} = Rn{5}; 2371} 2372 2373def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2374def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2375 2376// ...with address register writeback: 2377class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2378 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2379 (ins addrmode6:$Rn, am6offset:$Rm, 2380 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2381 IIC_VST4lnu, "vst4", Dt, 2382 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2383 "$Rn.addr = $wb", []> { 2384 let Inst{4} = Rn{4}; 2385 let DecoderMethod = "DecodeVST4LN"; 2386} 2387 2388def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2389 let Inst{7-5} = lane{2-0}; 2390} 2391def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2392 let Inst{7-6} = lane{1-0}; 2393} 2394def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2395 let Inst{7} = lane{0}; 2396 let Inst{5} = Rn{5}; 2397} 2398 2399def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2400def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2401def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2402 2403def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2404 let Inst{7-6} = lane{1-0}; 2405} 2406def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2407 let Inst{7} = lane{0}; 2408 let Inst{5} = Rn{5}; 2409} 2410 2411def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2412def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2413 2414} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2415 2416// Use vld1/vst1 for unaligned f64 load / store 2417let Predicates = [IsLE,HasNEON] in { 2418def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2419 (VLD1d16 addrmode6:$addr)>; 2420def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2421 (VST1d16 addrmode6:$addr, DPR:$value)>; 2422def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2423 (VLD1d8 addrmode6:$addr)>; 2424def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2425 (VST1d8 addrmode6:$addr, DPR:$value)>; 2426} 2427let Predicates = [IsBE,HasNEON] in { 2428def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2429 (VLD1d64 addrmode6:$addr)>; 2430def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2431 (VST1d64 addrmode6:$addr, DPR:$value)>; 2432} 2433 2434// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2435// load / store if it's legal. 2436let Predicates = [HasNEON] in { 2437def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2438 (VLD1q64 addrmode6:$addr)>; 2439def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2440 (VST1q64 addrmode6:$addr, QPR:$value)>; 2441} 2442let Predicates = [IsLE,HasNEON] in { 2443def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2444 (VLD1q32 addrmode6:$addr)>; 2445def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2446 (VST1q32 addrmode6:$addr, QPR:$value)>; 2447def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2448 (VLD1q16 addrmode6:$addr)>; 2449def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2450 (VST1q16 addrmode6:$addr, QPR:$value)>; 2451def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2452 (VLD1q8 addrmode6:$addr)>; 2453def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2454 (VST1q8 addrmode6:$addr, QPR:$value)>; 2455} 2456 2457//===----------------------------------------------------------------------===// 2458// Instruction Classes 2459//===----------------------------------------------------------------------===// 2460 2461// Basic 2-register operations: double- and quad-register. 2462class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2463 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2464 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2465 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2466 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2467 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2468class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2469 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2470 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2471 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2472 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2473 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2474 2475// Basic 2-register intrinsics, both double- and quad-register. 2476class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2477 bits<2> op17_16, bits<5> op11_7, bit op4, 2478 InstrItinClass itin, string OpcodeStr, string Dt, 2479 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2480 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2481 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2482 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2483class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2484 bits<2> op17_16, bits<5> op11_7, bit op4, 2485 InstrItinClass itin, string OpcodeStr, string Dt, 2486 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2487 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2488 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2489 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2490 2491// Same as above, but not predicated. 2492class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2493 InstrItinClass itin, string OpcodeStr, string Dt, 2494 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2495 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2496 itin, OpcodeStr, Dt, 2497 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2498 2499class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2500 InstrItinClass itin, string OpcodeStr, string Dt, 2501 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2502 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2503 itin, OpcodeStr, Dt, 2504 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2505 2506// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2507class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2508 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2509 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2510 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2511 itin, OpcodeStr, Dt, 2512 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2513 2514// Same as N2VQIntXnp but with Vd as a src register. 2515class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2516 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2517 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2518 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2519 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2520 itin, OpcodeStr, Dt, 2521 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2522 let Constraints = "$src = $Vd"; 2523} 2524 2525// Narrow 2-register operations. 2526class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2527 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2528 InstrItinClass itin, string OpcodeStr, string Dt, 2529 ValueType TyD, ValueType TyQ, SDNode OpNode> 2530 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2531 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2532 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2533 2534// Narrow 2-register intrinsics. 2535class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2536 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2537 InstrItinClass itin, string OpcodeStr, string Dt, 2538 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2539 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2540 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2541 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2542 2543// Long 2-register operations (currently only used for VMOVL). 2544class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2545 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2546 InstrItinClass itin, string OpcodeStr, string Dt, 2547 ValueType TyQ, ValueType TyD, SDNode OpNode> 2548 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2549 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2550 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2551 2552// Long 2-register intrinsics. 2553class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2554 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2555 InstrItinClass itin, string OpcodeStr, string Dt, 2556 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2557 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2558 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2559 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2560 2561// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2562class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2563 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2564 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2565 OpcodeStr, Dt, "$Vd, $Vm", 2566 "$src1 = $Vd, $src2 = $Vm", []>; 2567class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2568 InstrItinClass itin, string OpcodeStr, string Dt> 2569 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2570 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2571 "$src1 = $Vd, $src2 = $Vm", []>; 2572 2573// Basic 3-register operations: double- and quad-register. 2574class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2575 InstrItinClass itin, string OpcodeStr, string Dt, 2576 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2577 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2578 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2579 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2580 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2581 // All of these have a two-operand InstAlias. 2582 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2583 let isCommutable = Commutable; 2584} 2585// Same as N3VD but no data type. 2586class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2587 InstrItinClass itin, string OpcodeStr, 2588 ValueType ResTy, ValueType OpTy, 2589 SDNode OpNode, bit Commutable> 2590 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2591 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2592 OpcodeStr, "$Vd, $Vn, $Vm", "", 2593 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2594 // All of these have a two-operand InstAlias. 2595 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2596 let isCommutable = Commutable; 2597} 2598 2599class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2600 InstrItinClass itin, string OpcodeStr, string Dt, 2601 ValueType Ty, SDNode ShOp> 2602 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2603 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2604 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2605 [(set (Ty DPR:$Vd), 2606 (Ty (ShOp (Ty DPR:$Vn), 2607 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2608 // All of these have a two-operand InstAlias. 2609 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2610 let isCommutable = 0; 2611} 2612class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2613 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2614 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2615 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2616 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2617 [(set (Ty DPR:$Vd), 2618 (Ty (ShOp (Ty DPR:$Vn), 2619 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2620 // All of these have a two-operand InstAlias. 2621 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2622 let isCommutable = 0; 2623} 2624 2625class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2626 InstrItinClass itin, string OpcodeStr, string Dt, 2627 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2628 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2629 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2630 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2631 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2632 // All of these have a two-operand InstAlias. 2633 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2634 let isCommutable = Commutable; 2635} 2636class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2637 InstrItinClass itin, string OpcodeStr, 2638 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2639 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2640 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2641 OpcodeStr, "$Vd, $Vn, $Vm", "", 2642 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2643 // All of these have a two-operand InstAlias. 2644 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2645 let isCommutable = Commutable; 2646} 2647class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2648 InstrItinClass itin, string OpcodeStr, string Dt, 2649 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2650 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2651 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2652 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2653 [(set (ResTy QPR:$Vd), 2654 (ResTy (ShOp (ResTy QPR:$Vn), 2655 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2656 imm:$lane)))))]> { 2657 // All of these have a two-operand InstAlias. 2658 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2659 let isCommutable = 0; 2660} 2661class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2662 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2663 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2664 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2665 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2666 [(set (ResTy QPR:$Vd), 2667 (ResTy (ShOp (ResTy QPR:$Vn), 2668 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2669 imm:$lane)))))]> { 2670 // All of these have a two-operand InstAlias. 2671 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2672 let isCommutable = 0; 2673} 2674 2675// Basic 3-register intrinsics, both double- and quad-register. 2676class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2677 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2678 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2679 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2680 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2681 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2682 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2683 // All of these have a two-operand InstAlias. 2684 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2685 let isCommutable = Commutable; 2686} 2687 2688class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2689 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2690 string Dt, ValueType ResTy, ValueType OpTy, 2691 SDPatternOperator IntOp, bit Commutable> 2692 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2693 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2694 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2695 2696class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2697 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2698 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2699 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2700 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2701 [(set (Ty DPR:$Vd), 2702 (Ty (IntOp (Ty DPR:$Vn), 2703 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2704 imm:$lane)))))]> { 2705 let isCommutable = 0; 2706} 2707 2708class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2709 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2710 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2711 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2712 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2713 [(set (Ty DPR:$Vd), 2714 (Ty (IntOp (Ty DPR:$Vn), 2715 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2716 let isCommutable = 0; 2717} 2718class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2719 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2720 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2721 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2722 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2723 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2724 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2725 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2726 let isCommutable = 0; 2727} 2728 2729class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2730 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2731 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2732 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2733 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2734 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2735 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2736 // All of these have a two-operand InstAlias. 2737 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2738 let isCommutable = Commutable; 2739} 2740 2741class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2742 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2743 string Dt, ValueType ResTy, ValueType OpTy, 2744 SDPatternOperator IntOp, bit Commutable> 2745 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2746 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2747 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2748 2749// Same as N3VQIntnp but with Vd as a src register. 2750class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2751 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2752 string Dt, ValueType ResTy, ValueType OpTy, 2753 SDPatternOperator IntOp, bit Commutable> 2754 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2755 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2756 f, itin, OpcodeStr, Dt, 2757 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2758 (OpTy QPR:$Vm))))]> { 2759 let Constraints = "$src = $Vd"; 2760} 2761 2762class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2763 string OpcodeStr, string Dt, 2764 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2765 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2766 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2767 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2768 [(set (ResTy QPR:$Vd), 2769 (ResTy (IntOp (ResTy QPR:$Vn), 2770 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2771 imm:$lane)))))]> { 2772 let isCommutable = 0; 2773} 2774class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2775 string OpcodeStr, string Dt, 2776 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2777 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2778 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2779 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2780 [(set (ResTy QPR:$Vd), 2781 (ResTy (IntOp (ResTy QPR:$Vn), 2782 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2783 imm:$lane)))))]> { 2784 let isCommutable = 0; 2785} 2786class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2787 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2788 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2789 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2790 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2791 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2792 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2793 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2794 let isCommutable = 0; 2795} 2796 2797// Multiply-Add/Sub operations: double- and quad-register. 2798class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2799 InstrItinClass itin, string OpcodeStr, string Dt, 2800 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2801 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2802 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2803 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2804 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2805 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2806 2807class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2808 string OpcodeStr, string Dt, 2809 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2810 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2811 (outs DPR:$Vd), 2812 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2813 NVMulSLFrm, itin, 2814 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2815 [(set (Ty DPR:$Vd), 2816 (Ty (ShOp (Ty DPR:$src1), 2817 (Ty (MulOp DPR:$Vn, 2818 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2819 imm:$lane)))))))]>; 2820class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2821 string OpcodeStr, string Dt, 2822 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2823 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2824 (outs DPR:$Vd), 2825 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2826 NVMulSLFrm, itin, 2827 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2828 [(set (Ty DPR:$Vd), 2829 (Ty (ShOp (Ty DPR:$src1), 2830 (Ty (MulOp DPR:$Vn, 2831 (Ty (ARMvduplane (Ty DPR_8:$Vm), 2832 imm:$lane)))))))]>; 2833 2834class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2835 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2836 SDPatternOperator MulOp, SDPatternOperator OpNode> 2837 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2838 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2839 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2840 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2841 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2842class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2843 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2844 SDPatternOperator MulOp, SDPatternOperator ShOp> 2845 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2846 (outs QPR:$Vd), 2847 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2848 NVMulSLFrm, itin, 2849 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2850 [(set (ResTy QPR:$Vd), 2851 (ResTy (ShOp (ResTy QPR:$src1), 2852 (ResTy (MulOp QPR:$Vn, 2853 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2854 imm:$lane)))))))]>; 2855class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2856 string OpcodeStr, string Dt, 2857 ValueType ResTy, ValueType OpTy, 2858 SDPatternOperator MulOp, SDPatternOperator ShOp> 2859 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2860 (outs QPR:$Vd), 2861 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2862 NVMulSLFrm, itin, 2863 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2864 [(set (ResTy QPR:$Vd), 2865 (ResTy (ShOp (ResTy QPR:$src1), 2866 (ResTy (MulOp QPR:$Vn, 2867 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2868 imm:$lane)))))))]>; 2869 2870// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2871class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2872 InstrItinClass itin, string OpcodeStr, string Dt, 2873 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2874 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2875 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2876 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2877 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2878 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2879class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2880 InstrItinClass itin, string OpcodeStr, string Dt, 2881 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2882 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2883 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2884 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2885 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2886 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2887 2888// Neon 3-argument intrinsics, both double- and quad-register. 2889// The destination register is also used as the first source operand register. 2890class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2891 InstrItinClass itin, string OpcodeStr, string Dt, 2892 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2893 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2894 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2895 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2896 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2897 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2898class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2899 InstrItinClass itin, string OpcodeStr, string Dt, 2900 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2901 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2902 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2903 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2904 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2905 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2906 2907// Long Multiply-Add/Sub operations. 2908class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2909 InstrItinClass itin, string OpcodeStr, string Dt, 2910 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2911 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2912 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2913 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2914 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2915 (TyQ (MulOp (TyD DPR:$Vn), 2916 (TyD DPR:$Vm)))))]>; 2917class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2918 InstrItinClass itin, string OpcodeStr, string Dt, 2919 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2920 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2921 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2922 NVMulSLFrm, itin, 2923 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2924 [(set QPR:$Vd, 2925 (OpNode (TyQ QPR:$src1), 2926 (TyQ (MulOp (TyD DPR:$Vn), 2927 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm), 2928 imm:$lane))))))]>; 2929class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2930 InstrItinClass itin, string OpcodeStr, string Dt, 2931 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2932 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2933 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2934 NVMulSLFrm, itin, 2935 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2936 [(set QPR:$Vd, 2937 (OpNode (TyQ QPR:$src1), 2938 (TyQ (MulOp (TyD DPR:$Vn), 2939 (TyD (ARMvduplane (TyD DPR_8:$Vm), 2940 imm:$lane))))))]>; 2941 2942// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2943class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2944 InstrItinClass itin, string OpcodeStr, string Dt, 2945 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2946 SDNode OpNode> 2947 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2948 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2949 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2950 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2951 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2952 (TyD DPR:$Vm)))))))]>; 2953 2954// Neon Long 3-argument intrinsic. The destination register is 2955// a quad-register and is also used as the first source operand register. 2956class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2957 InstrItinClass itin, string OpcodeStr, string Dt, 2958 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2959 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2960 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2961 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2962 [(set QPR:$Vd, 2963 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2964class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2965 string OpcodeStr, string Dt, 2966 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2967 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2968 (outs QPR:$Vd), 2969 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2970 NVMulSLFrm, itin, 2971 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2972 [(set (ResTy QPR:$Vd), 2973 (ResTy (IntOp (ResTy QPR:$src1), 2974 (OpTy DPR:$Vn), 2975 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2976 imm:$lane)))))]>; 2977class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2978 InstrItinClass itin, string OpcodeStr, string Dt, 2979 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2980 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2981 (outs QPR:$Vd), 2982 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2983 NVMulSLFrm, itin, 2984 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2985 [(set (ResTy QPR:$Vd), 2986 (ResTy (IntOp (ResTy QPR:$src1), 2987 (OpTy DPR:$Vn), 2988 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 2989 imm:$lane)))))]>; 2990 2991// Narrowing 3-register intrinsics. 2992class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2993 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2994 SDPatternOperator IntOp, bit Commutable> 2995 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2996 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2997 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2998 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2999 let isCommutable = Commutable; 3000} 3001 3002// Long 3-register operations. 3003class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3004 InstrItinClass itin, string OpcodeStr, string Dt, 3005 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 3006 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3007 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3008 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3009 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3010 let isCommutable = Commutable; 3011} 3012 3013class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 3014 InstrItinClass itin, string OpcodeStr, string Dt, 3015 ValueType TyQ, ValueType TyD, SDNode OpNode> 3016 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3017 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3018 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3019 [(set QPR:$Vd, 3020 (TyQ (OpNode (TyD DPR:$Vn), 3021 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 3022class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3023 InstrItinClass itin, string OpcodeStr, string Dt, 3024 ValueType TyQ, ValueType TyD, SDNode OpNode> 3025 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3026 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3027 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3028 [(set QPR:$Vd, 3029 (TyQ (OpNode (TyD DPR:$Vn), 3030 (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3031 3032// Long 3-register operations with explicitly extended operands. 3033class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3034 InstrItinClass itin, string OpcodeStr, string Dt, 3035 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 3036 bit Commutable> 3037 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3038 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3039 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3040 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3041 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3042 let isCommutable = Commutable; 3043} 3044 3045// Long 3-register intrinsics with explicit extend (VABDL). 3046class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3047 InstrItinClass itin, string OpcodeStr, string Dt, 3048 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3049 bit Commutable> 3050 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3051 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3052 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3053 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3054 (TyD DPR:$Vm))))))]> { 3055 let isCommutable = Commutable; 3056} 3057 3058// Long 3-register intrinsics. 3059class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3060 InstrItinClass itin, string OpcodeStr, string Dt, 3061 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3062 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3063 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3064 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3065 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3066 let isCommutable = Commutable; 3067} 3068 3069// Same as above, but not predicated. 3070class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3071 bit op4, InstrItinClass itin, string OpcodeStr, 3072 string Dt, ValueType ResTy, ValueType OpTy, 3073 SDPatternOperator IntOp, bit Commutable> 3074 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3075 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3076 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 3077 3078class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3079 string OpcodeStr, string Dt, 3080 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3081 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3082 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3083 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3084 [(set (ResTy QPR:$Vd), 3085 (ResTy (IntOp (OpTy DPR:$Vn), 3086 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3087 imm:$lane)))))]>; 3088class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3089 InstrItinClass itin, string OpcodeStr, string Dt, 3090 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3091 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3092 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3093 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3094 [(set (ResTy QPR:$Vd), 3095 (ResTy (IntOp (OpTy DPR:$Vn), 3096 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3097 imm:$lane)))))]>; 3098 3099// Wide 3-register operations. 3100class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3101 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3102 SDNode OpNode, SDNode ExtOp, bit Commutable> 3103 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3104 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3105 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3106 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3107 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3108 // All of these have a two-operand InstAlias. 3109 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3110 let isCommutable = Commutable; 3111} 3112 3113// Pairwise long 2-register intrinsics, both double- and quad-register. 3114class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3115 bits<2> op17_16, bits<5> op11_7, bit op4, 3116 string OpcodeStr, string Dt, 3117 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3118 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3119 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3120 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3121class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3122 bits<2> op17_16, bits<5> op11_7, bit op4, 3123 string OpcodeStr, string Dt, 3124 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3125 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3126 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3127 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3128 3129// Pairwise long 2-register accumulate intrinsics, 3130// both double- and quad-register. 3131// The destination register is also used as the first source operand register. 3132class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3133 bits<2> op17_16, bits<5> op11_7, bit op4, 3134 string OpcodeStr, string Dt, 3135 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3136 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3137 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3138 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3139 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3140class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3141 bits<2> op17_16, bits<5> op11_7, bit op4, 3142 string OpcodeStr, string Dt, 3143 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3144 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3145 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3146 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3147 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3148 3149// Shift by immediate, 3150// both double- and quad-register. 3151let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3152class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3153 Format f, InstrItinClass itin, Operand ImmTy, 3154 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3155 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3156 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3157 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3158 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3159class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3160 Format f, InstrItinClass itin, Operand ImmTy, 3161 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3162 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3163 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3164 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3165 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3166} 3167 3168// Long shift by immediate. 3169class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3170 string OpcodeStr, string Dt, 3171 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3172 SDPatternOperator OpNode> 3173 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3174 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3175 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3176 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3177 3178// Narrow shift by immediate. 3179class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3180 InstrItinClass itin, string OpcodeStr, string Dt, 3181 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3182 SDPatternOperator OpNode> 3183 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3184 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3185 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3186 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3187 (i32 ImmTy:$SIMM))))]>; 3188 3189// Shift right by immediate and accumulate, 3190// both double- and quad-register. 3191let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3192class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3193 Operand ImmTy, string OpcodeStr, string Dt, 3194 ValueType Ty, SDNode ShOp> 3195 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3196 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3197 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3198 [(set DPR:$Vd, (Ty (add DPR:$src1, 3199 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3200class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3201 Operand ImmTy, string OpcodeStr, string Dt, 3202 ValueType Ty, SDNode ShOp> 3203 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3204 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3205 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3206 [(set QPR:$Vd, (Ty (add QPR:$src1, 3207 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3208} 3209 3210// Shift by immediate and insert, 3211// both double- and quad-register. 3212let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3213class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3214 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3215 ValueType Ty,SDNode ShOp> 3216 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3217 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3218 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3219 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3220class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3221 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3222 ValueType Ty,SDNode ShOp> 3223 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3224 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3225 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3226 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3227} 3228 3229// Convert, with fractional bits immediate, 3230// both double- and quad-register. 3231class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3232 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3233 SDPatternOperator IntOp> 3234 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3235 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3236 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3237 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3238class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3239 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3240 SDPatternOperator IntOp> 3241 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3242 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3243 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3244 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3245 3246//===----------------------------------------------------------------------===// 3247// Multiclasses 3248//===----------------------------------------------------------------------===// 3249 3250// Abbreviations used in multiclass suffixes: 3251// Q = quarter int (8 bit) elements 3252// H = half int (16 bit) elements 3253// S = single int (32 bit) elements 3254// D = double int (64 bit) elements 3255 3256// Neon 2-register vector operations and intrinsics. 3257 3258// Neon 2-register comparisons. 3259// source operand element sizes of 8, 16 and 32 bits: 3260multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3261 bits<5> op11_7, bit op4, string opc, string Dt, 3262 string asm, PatFrag fc> { 3263 // 64-bit vector types. 3264 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3265 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3266 opc, !strconcat(Dt, "8"), asm, "", 3267 [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>; 3268 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3269 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3270 opc, !strconcat(Dt, "16"), asm, "", 3271 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>; 3272 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3273 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3274 opc, !strconcat(Dt, "32"), asm, "", 3275 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>; 3276 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3277 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3278 opc, "f32", asm, "", 3279 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> { 3280 let Inst{10} = 1; // overwrite F = 1 3281 } 3282 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3283 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3284 opc, "f16", asm, "", 3285 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>, 3286 Requires<[HasNEON,HasFullFP16]> { 3287 let Inst{10} = 1; // overwrite F = 1 3288 } 3289 3290 // 128-bit vector types. 3291 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3292 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3293 opc, !strconcat(Dt, "8"), asm, "", 3294 [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>; 3295 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3296 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3297 opc, !strconcat(Dt, "16"), asm, "", 3298 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>; 3299 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3300 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3301 opc, !strconcat(Dt, "32"), asm, "", 3302 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>; 3303 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3304 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3305 opc, "f32", asm, "", 3306 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> { 3307 let Inst{10} = 1; // overwrite F = 1 3308 } 3309 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3310 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3311 opc, "f16", asm, "", 3312 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>, 3313 Requires<[HasNEON,HasFullFP16]> { 3314 let Inst{10} = 1; // overwrite F = 1 3315 } 3316} 3317 3318// Neon 3-register comparisons. 3319class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3320 InstrItinClass itin, string OpcodeStr, string Dt, 3321 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3322 : N3V<op24, op23, op21_20, op11_8, 1, op4, 3323 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 3324 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3325 [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> { 3326 // All of these have a two-operand InstAlias. 3327 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3328 let isCommutable = Commutable; 3329} 3330 3331class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3332 InstrItinClass itin, string OpcodeStr, string Dt, 3333 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3334 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3335 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3336 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3337 [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> { 3338 // All of these have a two-operand InstAlias. 3339 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3340 let isCommutable = Commutable; 3341} 3342 3343multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4, 3344 InstrItinClass itinD16, InstrItinClass itinD32, 3345 InstrItinClass itinQ16, InstrItinClass itinQ32, 3346 string OpcodeStr, string Dt, 3347 PatFrag fc, bit Commutable = 0> { 3348 // 64-bit vector types. 3349 def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16, 3350 OpcodeStr, !strconcat(Dt, "8"), 3351 v8i8, v8i8, fc, Commutable>; 3352 def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16, 3353 OpcodeStr, !strconcat(Dt, "16"), 3354 v4i16, v4i16, fc, Commutable>; 3355 def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32, 3356 OpcodeStr, !strconcat(Dt, "32"), 3357 v2i32, v2i32, fc, Commutable>; 3358 3359 // 128-bit vector types. 3360 def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16, 3361 OpcodeStr, !strconcat(Dt, "8"), 3362 v16i8, v16i8, fc, Commutable>; 3363 def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16, 3364 OpcodeStr, !strconcat(Dt, "16"), 3365 v8i16, v8i16, fc, Commutable>; 3366 def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32, 3367 OpcodeStr, !strconcat(Dt, "32"), 3368 v4i32, v4i32, fc, Commutable>; 3369} 3370 3371 3372// Neon 2-register vector intrinsics, 3373// element sizes of 8, 16 and 32 bits: 3374multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3375 bits<5> op11_7, bit op4, 3376 InstrItinClass itinD, InstrItinClass itinQ, 3377 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3378 // 64-bit vector types. 3379 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3380 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3381 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3382 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3383 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3384 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3385 3386 // 128-bit vector types. 3387 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3388 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3389 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3390 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3391 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3392 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3393} 3394 3395 3396// Neon Narrowing 2-register vector operations, 3397// source operand element sizes of 16, 32 and 64 bits: 3398multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3399 bits<5> op11_7, bit op6, bit op4, 3400 InstrItinClass itin, string OpcodeStr, string Dt, 3401 SDNode OpNode> { 3402 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3403 itin, OpcodeStr, !strconcat(Dt, "16"), 3404 v8i8, v8i16, OpNode>; 3405 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3406 itin, OpcodeStr, !strconcat(Dt, "32"), 3407 v4i16, v4i32, OpNode>; 3408 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3409 itin, OpcodeStr, !strconcat(Dt, "64"), 3410 v2i32, v2i64, OpNode>; 3411} 3412 3413// Neon Narrowing 2-register vector intrinsics, 3414// source operand element sizes of 16, 32 and 64 bits: 3415multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3416 bits<5> op11_7, bit op6, bit op4, 3417 InstrItinClass itin, string OpcodeStr, string Dt, 3418 SDPatternOperator IntOp> { 3419 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3420 itin, OpcodeStr, !strconcat(Dt, "16"), 3421 v8i8, v8i16, IntOp>; 3422 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3423 itin, OpcodeStr, !strconcat(Dt, "32"), 3424 v4i16, v4i32, IntOp>; 3425 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3426 itin, OpcodeStr, !strconcat(Dt, "64"), 3427 v2i32, v2i64, IntOp>; 3428} 3429 3430 3431// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3432// source operand element sizes of 16, 32 and 64 bits: 3433multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3434 string OpcodeStr, string Dt, SDNode OpNode> { 3435 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3436 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3437 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3438 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3439 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3440 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3441} 3442 3443 3444// Neon 3-register vector operations. 3445 3446// First with only element sizes of 8, 16 and 32 bits: 3447multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3448 InstrItinClass itinD16, InstrItinClass itinD32, 3449 InstrItinClass itinQ16, InstrItinClass itinQ32, 3450 string OpcodeStr, string Dt, 3451 SDNode OpNode, bit Commutable = 0> { 3452 // 64-bit vector types. 3453 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3454 OpcodeStr, !strconcat(Dt, "8"), 3455 v8i8, v8i8, OpNode, Commutable>; 3456 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3457 OpcodeStr, !strconcat(Dt, "16"), 3458 v4i16, v4i16, OpNode, Commutable>; 3459 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3460 OpcodeStr, !strconcat(Dt, "32"), 3461 v2i32, v2i32, OpNode, Commutable>; 3462 3463 // 128-bit vector types. 3464 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3465 OpcodeStr, !strconcat(Dt, "8"), 3466 v16i8, v16i8, OpNode, Commutable>; 3467 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3468 OpcodeStr, !strconcat(Dt, "16"), 3469 v8i16, v8i16, OpNode, Commutable>; 3470 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3471 OpcodeStr, !strconcat(Dt, "32"), 3472 v4i32, v4i32, OpNode, Commutable>; 3473} 3474 3475multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3476 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3477 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3478 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3479 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3480 v4i32, v2i32, ShOp>; 3481} 3482 3483// ....then also with element size 64 bits: 3484multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3485 InstrItinClass itinD, InstrItinClass itinQ, 3486 string OpcodeStr, string Dt, 3487 SDNode OpNode, bit Commutable = 0> 3488 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3489 OpcodeStr, Dt, OpNode, Commutable> { 3490 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3491 OpcodeStr, !strconcat(Dt, "64"), 3492 v1i64, v1i64, OpNode, Commutable>; 3493 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3494 OpcodeStr, !strconcat(Dt, "64"), 3495 v2i64, v2i64, OpNode, Commutable>; 3496} 3497 3498 3499// Neon 3-register vector intrinsics. 3500 3501// First with only element sizes of 16 and 32 bits: 3502multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3503 InstrItinClass itinD16, InstrItinClass itinD32, 3504 InstrItinClass itinQ16, InstrItinClass itinQ32, 3505 string OpcodeStr, string Dt, 3506 SDPatternOperator IntOp, bit Commutable = 0> { 3507 // 64-bit vector types. 3508 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3509 OpcodeStr, !strconcat(Dt, "16"), 3510 v4i16, v4i16, IntOp, Commutable>; 3511 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3512 OpcodeStr, !strconcat(Dt, "32"), 3513 v2i32, v2i32, IntOp, Commutable>; 3514 3515 // 128-bit vector types. 3516 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3517 OpcodeStr, !strconcat(Dt, "16"), 3518 v8i16, v8i16, IntOp, Commutable>; 3519 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3520 OpcodeStr, !strconcat(Dt, "32"), 3521 v4i32, v4i32, IntOp, Commutable>; 3522} 3523multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3524 InstrItinClass itinD16, InstrItinClass itinD32, 3525 InstrItinClass itinQ16, InstrItinClass itinQ32, 3526 string OpcodeStr, string Dt, 3527 SDPatternOperator IntOp> { 3528 // 64-bit vector types. 3529 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3530 OpcodeStr, !strconcat(Dt, "16"), 3531 v4i16, v4i16, IntOp>; 3532 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3533 OpcodeStr, !strconcat(Dt, "32"), 3534 v2i32, v2i32, IntOp>; 3535 3536 // 128-bit vector types. 3537 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3538 OpcodeStr, !strconcat(Dt, "16"), 3539 v8i16, v8i16, IntOp>; 3540 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3541 OpcodeStr, !strconcat(Dt, "32"), 3542 v4i32, v4i32, IntOp>; 3543} 3544 3545multiclass N3VIntSL_HS<bits<4> op11_8, 3546 InstrItinClass itinD16, InstrItinClass itinD32, 3547 InstrItinClass itinQ16, InstrItinClass itinQ32, 3548 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3549 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3550 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3551 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3552 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3553 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3554 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3555 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3556 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3557} 3558 3559// ....then also with element size of 8 bits: 3560multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3561 InstrItinClass itinD16, InstrItinClass itinD32, 3562 InstrItinClass itinQ16, InstrItinClass itinQ32, 3563 string OpcodeStr, string Dt, 3564 SDPatternOperator IntOp, bit Commutable = 0> 3565 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3566 OpcodeStr, Dt, IntOp, Commutable> { 3567 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3568 OpcodeStr, !strconcat(Dt, "8"), 3569 v8i8, v8i8, IntOp, Commutable>; 3570 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3571 OpcodeStr, !strconcat(Dt, "8"), 3572 v16i8, v16i8, IntOp, Commutable>; 3573} 3574multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3575 InstrItinClass itinD16, InstrItinClass itinD32, 3576 InstrItinClass itinQ16, InstrItinClass itinQ32, 3577 string OpcodeStr, string Dt, 3578 SDPatternOperator IntOp> 3579 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3580 OpcodeStr, Dt, IntOp> { 3581 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3582 OpcodeStr, !strconcat(Dt, "8"), 3583 v8i8, v8i8, IntOp>; 3584 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3585 OpcodeStr, !strconcat(Dt, "8"), 3586 v16i8, v16i8, IntOp>; 3587} 3588 3589 3590// ....then also with element size of 64 bits: 3591multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3592 InstrItinClass itinD16, InstrItinClass itinD32, 3593 InstrItinClass itinQ16, InstrItinClass itinQ32, 3594 string OpcodeStr, string Dt, 3595 SDPatternOperator IntOp, bit Commutable = 0> 3596 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3597 OpcodeStr, Dt, IntOp, Commutable> { 3598 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3599 OpcodeStr, !strconcat(Dt, "64"), 3600 v1i64, v1i64, IntOp, Commutable>; 3601 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3602 OpcodeStr, !strconcat(Dt, "64"), 3603 v2i64, v2i64, IntOp, Commutable>; 3604} 3605multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3606 InstrItinClass itinD16, InstrItinClass itinD32, 3607 InstrItinClass itinQ16, InstrItinClass itinQ32, 3608 string OpcodeStr, string Dt, 3609 SDPatternOperator IntOp> 3610 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3611 OpcodeStr, Dt, IntOp> { 3612 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3613 OpcodeStr, !strconcat(Dt, "64"), 3614 v1i64, v1i64, IntOp>; 3615 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3616 OpcodeStr, !strconcat(Dt, "64"), 3617 v2i64, v2i64, IntOp>; 3618} 3619 3620// Neon Narrowing 3-register vector intrinsics, 3621// source operand element sizes of 16, 32 and 64 bits: 3622multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3623 string OpcodeStr, string Dt, 3624 SDPatternOperator IntOp, bit Commutable = 0> { 3625 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3626 OpcodeStr, !strconcat(Dt, "16"), 3627 v8i8, v8i16, IntOp, Commutable>; 3628 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3629 OpcodeStr, !strconcat(Dt, "32"), 3630 v4i16, v4i32, IntOp, Commutable>; 3631 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3632 OpcodeStr, !strconcat(Dt, "64"), 3633 v2i32, v2i64, IntOp, Commutable>; 3634} 3635 3636 3637// Neon Long 3-register vector operations. 3638 3639multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3640 InstrItinClass itin16, InstrItinClass itin32, 3641 string OpcodeStr, string Dt, 3642 SDNode OpNode, bit Commutable = 0> { 3643 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3644 OpcodeStr, !strconcat(Dt, "8"), 3645 v8i16, v8i8, OpNode, Commutable>; 3646 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3647 OpcodeStr, !strconcat(Dt, "16"), 3648 v4i32, v4i16, OpNode, Commutable>; 3649 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3650 OpcodeStr, !strconcat(Dt, "32"), 3651 v2i64, v2i32, OpNode, Commutable>; 3652} 3653 3654multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3655 InstrItinClass itin, string OpcodeStr, string Dt, 3656 SDNode OpNode> { 3657 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3658 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3659 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3660 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3661} 3662 3663multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3664 InstrItinClass itin16, InstrItinClass itin32, 3665 string OpcodeStr, string Dt, 3666 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3667 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3668 OpcodeStr, !strconcat(Dt, "8"), 3669 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3670 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3671 OpcodeStr, !strconcat(Dt, "16"), 3672 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3673 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3674 OpcodeStr, !strconcat(Dt, "32"), 3675 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3676} 3677 3678// Neon Long 3-register vector intrinsics. 3679 3680// First with only element sizes of 16 and 32 bits: 3681multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3682 InstrItinClass itin16, InstrItinClass itin32, 3683 string OpcodeStr, string Dt, 3684 SDPatternOperator IntOp, bit Commutable = 0> { 3685 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3686 OpcodeStr, !strconcat(Dt, "16"), 3687 v4i32, v4i16, IntOp, Commutable>; 3688 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3689 OpcodeStr, !strconcat(Dt, "32"), 3690 v2i64, v2i32, IntOp, Commutable>; 3691} 3692 3693multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3694 InstrItinClass itin, string OpcodeStr, string Dt, 3695 SDPatternOperator IntOp> { 3696 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3697 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3698 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3699 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3700} 3701 3702// ....then also with element size of 8 bits: 3703multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3704 InstrItinClass itin16, InstrItinClass itin32, 3705 string OpcodeStr, string Dt, 3706 SDPatternOperator IntOp, bit Commutable = 0> 3707 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3708 IntOp, Commutable> { 3709 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3710 OpcodeStr, !strconcat(Dt, "8"), 3711 v8i16, v8i8, IntOp, Commutable>; 3712} 3713 3714// ....with explicit extend (VABDL). 3715multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3716 InstrItinClass itin, string OpcodeStr, string Dt, 3717 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3718 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3719 OpcodeStr, !strconcat(Dt, "8"), 3720 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3721 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3722 OpcodeStr, !strconcat(Dt, "16"), 3723 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3724 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3725 OpcodeStr, !strconcat(Dt, "32"), 3726 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3727} 3728 3729 3730// Neon Wide 3-register vector intrinsics, 3731// source operand element sizes of 8, 16 and 32 bits: 3732multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3733 string OpcodeStr, string Dt, 3734 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3735 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3736 OpcodeStr, !strconcat(Dt, "8"), 3737 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3738 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3739 OpcodeStr, !strconcat(Dt, "16"), 3740 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3741 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3742 OpcodeStr, !strconcat(Dt, "32"), 3743 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3744} 3745 3746 3747// Neon Multiply-Op vector operations, 3748// element sizes of 8, 16 and 32 bits: 3749multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3750 InstrItinClass itinD16, InstrItinClass itinD32, 3751 InstrItinClass itinQ16, InstrItinClass itinQ32, 3752 string OpcodeStr, string Dt, SDNode OpNode> { 3753 // 64-bit vector types. 3754 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3755 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3756 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3757 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3758 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3759 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3760 3761 // 128-bit vector types. 3762 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3763 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3764 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3765 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3766 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3767 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3768} 3769 3770multiclass N3VMulOpSL_HS<bits<4> op11_8, 3771 InstrItinClass itinD16, InstrItinClass itinD32, 3772 InstrItinClass itinQ16, InstrItinClass itinQ32, 3773 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3774 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3775 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3776 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3777 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3778 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3779 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3780 mul, ShOp>; 3781 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3782 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3783 mul, ShOp>; 3784} 3785 3786// Neon Intrinsic-Op vector operations, 3787// element sizes of 8, 16 and 32 bits: 3788multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3789 InstrItinClass itinD, InstrItinClass itinQ, 3790 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3791 SDNode OpNode> { 3792 // 64-bit vector types. 3793 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3794 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3795 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3796 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3797 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3798 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3799 3800 // 128-bit vector types. 3801 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3802 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3803 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3804 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3805 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3806 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3807} 3808 3809// Neon 3-argument intrinsics, 3810// element sizes of 16 and 32 bits: 3811multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3812 InstrItinClass itinD16, InstrItinClass itinD32, 3813 InstrItinClass itinQ16, InstrItinClass itinQ32, 3814 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3815 // 64-bit vector types. 3816 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3817 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3818 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3819 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3820 3821 // 128-bit vector types. 3822 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3823 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3824 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3825 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3826} 3827 3828// element sizes of 8, 16 and 32 bits: 3829multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3830 InstrItinClass itinD16, InstrItinClass itinD32, 3831 InstrItinClass itinQ16, InstrItinClass itinQ32, 3832 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3833 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3834 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3835 // 64-bit vector types. 3836 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3837 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3838 // 128-bit vector types. 3839 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3840 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3841} 3842 3843// Neon Long Multiply-Op vector operations, 3844// element sizes of 8, 16 and 32 bits: 3845multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3846 InstrItinClass itin16, InstrItinClass itin32, 3847 string OpcodeStr, string Dt, SDNode MulOp, 3848 SDNode OpNode> { 3849 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3850 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3851 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3852 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3853 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3854 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3855} 3856 3857multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3858 string Dt, SDNode MulOp, SDNode OpNode> { 3859 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3860 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3861 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3862 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3863} 3864 3865 3866// Neon Long 3-argument intrinsics. 3867 3868// First with only element sizes of 16 and 32 bits: 3869multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3870 InstrItinClass itin16, InstrItinClass itin32, 3871 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3872 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3873 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3874 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3875 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3876} 3877 3878multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3879 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3880 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3881 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3882 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3883 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3884} 3885 3886// ....then also with element size of 8 bits: 3887multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3888 InstrItinClass itin16, InstrItinClass itin32, 3889 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3890 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3891 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3892 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3893} 3894 3895// ....with explicit extend (VABAL). 3896multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3897 InstrItinClass itin, string OpcodeStr, string Dt, 3898 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3899 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3900 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3901 IntOp, ExtOp, OpNode>; 3902 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3903 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3904 IntOp, ExtOp, OpNode>; 3905 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3906 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3907 IntOp, ExtOp, OpNode>; 3908} 3909 3910 3911// Neon Pairwise long 2-register intrinsics, 3912// element sizes of 8, 16 and 32 bits: 3913multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3914 bits<5> op11_7, bit op4, 3915 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3916 // 64-bit vector types. 3917 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3918 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3919 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3920 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3921 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3922 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3923 3924 // 128-bit vector types. 3925 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3926 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3927 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3928 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3929 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3930 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3931} 3932 3933 3934// Neon Pairwise long 2-register accumulate intrinsics, 3935// element sizes of 8, 16 and 32 bits: 3936multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3937 bits<5> op11_7, bit op4, 3938 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3939 // 64-bit vector types. 3940 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3941 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3942 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3943 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3944 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3945 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3946 3947 // 128-bit vector types. 3948 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3949 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3950 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3951 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3952 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3953 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3954} 3955 3956 3957// Neon 2-register vector shift by immediate, 3958// with f of either N2RegVShLFrm or N2RegVShRFrm 3959// element sizes of 8, 16, 32 and 64 bits: 3960multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3961 InstrItinClass itin, string OpcodeStr, string Dt, 3962 SDNode OpNode> { 3963 // 64-bit vector types. 3964 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3965 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3966 let Inst{21-19} = 0b001; // imm6 = 001xxx 3967 } 3968 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3969 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3970 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3971 } 3972 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3973 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3974 let Inst{21} = 0b1; // imm6 = 1xxxxx 3975 } 3976 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3977 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3978 // imm6 = xxxxxx 3979 3980 // 128-bit vector types. 3981 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3982 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3983 let Inst{21-19} = 0b001; // imm6 = 001xxx 3984 } 3985 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3986 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3987 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3988 } 3989 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3990 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3991 let Inst{21} = 0b1; // imm6 = 1xxxxx 3992 } 3993 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3994 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3995 // imm6 = xxxxxx 3996} 3997multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3998 InstrItinClass itin, string OpcodeStr, string Dt, 3999 string baseOpc, SDNode OpNode> { 4000 // 64-bit vector types. 4001 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4002 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4003 let Inst{21-19} = 0b001; // imm6 = 001xxx 4004 } 4005 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4006 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4007 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4008 } 4009 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4010 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4011 let Inst{21} = 0b1; // imm6 = 1xxxxx 4012 } 4013 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4014 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4015 // imm6 = xxxxxx 4016 4017 // 128-bit vector types. 4018 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4019 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4020 let Inst{21-19} = 0b001; // imm6 = 001xxx 4021 } 4022 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4023 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4024 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4025 } 4026 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4027 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4028 let Inst{21} = 0b1; // imm6 = 1xxxxx 4029 } 4030 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4031 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4032 // imm6 = xxxxxx 4033} 4034 4035// Neon Shift-Accumulate vector operations, 4036// element sizes of 8, 16, 32 and 64 bits: 4037multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4038 string OpcodeStr, string Dt, SDNode ShOp> { 4039 // 64-bit vector types. 4040 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4041 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 4042 let Inst{21-19} = 0b001; // imm6 = 001xxx 4043 } 4044 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4045 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 4046 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4047 } 4048 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4049 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 4050 let Inst{21} = 0b1; // imm6 = 1xxxxx 4051 } 4052 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4053 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 4054 // imm6 = xxxxxx 4055 4056 // 128-bit vector types. 4057 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4058 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 4059 let Inst{21-19} = 0b001; // imm6 = 001xxx 4060 } 4061 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4062 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 4063 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4064 } 4065 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4066 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 4067 let Inst{21} = 0b1; // imm6 = 1xxxxx 4068 } 4069 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4070 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 4071 // imm6 = xxxxxx 4072} 4073 4074// Neon Shift-Insert vector operations, 4075// with f of either N2RegVShLFrm or N2RegVShRFrm 4076// element sizes of 8, 16, 32 and 64 bits: 4077multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4078 string OpcodeStr> { 4079 // 64-bit vector types. 4080 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4081 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> { 4082 let Inst{21-19} = 0b001; // imm6 = 001xxx 4083 } 4084 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4085 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> { 4086 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4087 } 4088 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4089 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> { 4090 let Inst{21} = 0b1; // imm6 = 1xxxxx 4091 } 4092 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 4093 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>; 4094 // imm6 = xxxxxx 4095 4096 // 128-bit vector types. 4097 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4098 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> { 4099 let Inst{21-19} = 0b001; // imm6 = 001xxx 4100 } 4101 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4102 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> { 4103 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4104 } 4105 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4106 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> { 4107 let Inst{21} = 0b1; // imm6 = 1xxxxx 4108 } 4109 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4110 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>; 4111 // imm6 = xxxxxx 4112} 4113multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4114 string OpcodeStr> { 4115 // 64-bit vector types. 4116 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4117 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> { 4118 let Inst{21-19} = 0b001; // imm6 = 001xxx 4119 } 4120 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4121 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> { 4122 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4123 } 4124 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4125 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> { 4126 let Inst{21} = 0b1; // imm6 = 1xxxxx 4127 } 4128 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4129 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>; 4130 // imm6 = xxxxxx 4131 4132 // 128-bit vector types. 4133 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4134 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> { 4135 let Inst{21-19} = 0b001; // imm6 = 001xxx 4136 } 4137 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4138 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> { 4139 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4140 } 4141 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4142 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> { 4143 let Inst{21} = 0b1; // imm6 = 1xxxxx 4144 } 4145 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4146 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>; 4147 // imm6 = xxxxxx 4148} 4149 4150// Neon Shift Long operations, 4151// element sizes of 8, 16, 32 bits: 4152multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4153 bit op4, string OpcodeStr, string Dt, 4154 SDPatternOperator OpNode> { 4155 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4156 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4157 let Inst{21-19} = 0b001; // imm6 = 001xxx 4158 } 4159 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4160 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4161 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4162 } 4163 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4164 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4165 let Inst{21} = 0b1; // imm6 = 1xxxxx 4166 } 4167} 4168 4169// Neon Shift Narrow operations, 4170// element sizes of 16, 32, 64 bits: 4171multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4172 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4173 SDPatternOperator OpNode> { 4174 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4175 OpcodeStr, !strconcat(Dt, "16"), 4176 v8i8, v8i16, shr_imm8, OpNode> { 4177 let Inst{21-19} = 0b001; // imm6 = 001xxx 4178 } 4179 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4180 OpcodeStr, !strconcat(Dt, "32"), 4181 v4i16, v4i32, shr_imm16, OpNode> { 4182 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4183 } 4184 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4185 OpcodeStr, !strconcat(Dt, "64"), 4186 v2i32, v2i64, shr_imm32, OpNode> { 4187 let Inst{21} = 0b1; // imm6 = 1xxxxx 4188 } 4189} 4190 4191//===----------------------------------------------------------------------===// 4192// Instruction Definitions. 4193//===----------------------------------------------------------------------===// 4194 4195// Vector Add Operations. 4196 4197// VADD : Vector Add (integer and floating-point) 4198defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4199 add, 1>; 4200def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4201 v2f32, v2f32, fadd, 1>; 4202def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4203 v4f32, v4f32, fadd, 1>; 4204def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", 4205 v4f16, v4f16, fadd, 1>, 4206 Requires<[HasNEON,HasFullFP16]>; 4207def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", 4208 v8f16, v8f16, fadd, 1>, 4209 Requires<[HasNEON,HasFullFP16]>; 4210// VADDL : Vector Add Long (Q = D + D) 4211defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4212 "vaddl", "s", add, sext, 1>; 4213defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4214 "vaddl", "u", add, zext, 1>; 4215// VADDW : Vector Add Wide (Q = Q + D) 4216defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4217defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 4218// VHADD : Vector Halving Add 4219defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4220 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4221 "vhadd", "s", int_arm_neon_vhadds, 1>; 4222defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4223 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4224 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4225// VRHADD : Vector Rounding Halving Add 4226defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4227 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4228 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4229defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4230 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4231 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4232// VQADD : Vector Saturating Add 4233defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4234 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4235 "vqadd", "s", saddsat, 1>; 4236defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4237 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4238 "vqadd", "u", uaddsat, 1>; 4239// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4240defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4241// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4242defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4243 int_arm_neon_vraddhn, 1>; 4244 4245let Predicates = [HasNEON] in { 4246def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4247 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4248def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4249 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4250def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4251 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4252} 4253 4254// Vector Multiply Operations. 4255 4256// VMUL : Vector Multiply (integer, polynomial and floating-point) 4257defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4258 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4259def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4260 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4261def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4262 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4263def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4264 v2f32, v2f32, fmul, 1>; 4265def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4266 v4f32, v4f32, fmul, 1>; 4267def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", 4268 v4f16, v4f16, fmul, 1>, 4269 Requires<[HasNEON,HasFullFP16]>; 4270def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", 4271 v8f16, v8f16, fmul, 1>, 4272 Requires<[HasNEON,HasFullFP16]>; 4273defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4274def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4275def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4276 v2f32, fmul>; 4277def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, 4278 Requires<[HasNEON,HasFullFP16]>; 4279def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, 4280 v4f16, fmul>, 4281 Requires<[HasNEON,HasFullFP16]>; 4282 4283let Predicates = [HasNEON] in { 4284def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4285 (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))), 4286 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4287 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4288 (DSubReg_i16_reg imm:$lane))), 4289 (SubReg_i16_lane imm:$lane)))>; 4290def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4291 (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))), 4292 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4293 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4294 (DSubReg_i32_reg imm:$lane))), 4295 (SubReg_i32_lane imm:$lane)))>; 4296def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4297 (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))), 4298 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4299 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4300 (DSubReg_i32_reg imm:$lane))), 4301 (SubReg_i32_lane imm:$lane)))>; 4302def : Pat<(v8f16 (fmul (v8f16 QPR:$src1), 4303 (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))), 4304 (v8f16 (VMULslhq(v8f16 QPR:$src1), 4305 (v4f16 (EXTRACT_SUBREG QPR:$src2, 4306 (DSubReg_i16_reg imm:$lane))), 4307 (SubReg_i16_lane imm:$lane)))>; 4308 4309def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4310 (VMULslfd DPR:$Rn, 4311 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4312 (i32 0))>; 4313def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4314 (VMULslhd DPR:$Rn, 4315 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4316 (i32 0))>; 4317def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4318 (VMULslfq QPR:$Rn, 4319 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4320 (i32 0))>; 4321def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4322 (VMULslhq QPR:$Rn, 4323 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4324 (i32 0))>; 4325} 4326 4327// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4328defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4329 IIC_VMULi16Q, IIC_VMULi32Q, 4330 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4331defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4332 IIC_VMULi16Q, IIC_VMULi32Q, 4333 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4334 4335let Predicates = [HasNEON] in { 4336def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4337 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4338 imm:$lane)))), 4339 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4340 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4341 (DSubReg_i16_reg imm:$lane))), 4342 (SubReg_i16_lane imm:$lane)))>; 4343def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4344 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4345 imm:$lane)))), 4346 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4347 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4348 (DSubReg_i32_reg imm:$lane))), 4349 (SubReg_i32_lane imm:$lane)))>; 4350} 4351 4352// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4353defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4354 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4355 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4356defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4357 IIC_VMULi16Q, IIC_VMULi32Q, 4358 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4359 4360let Predicates = [HasNEON] in { 4361def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4362 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4363 imm:$lane)))), 4364 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4365 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4366 (DSubReg_i16_reg imm:$lane))), 4367 (SubReg_i16_lane imm:$lane)))>; 4368def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4369 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4370 imm:$lane)))), 4371 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4372 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4373 (DSubReg_i32_reg imm:$lane))), 4374 (SubReg_i32_lane imm:$lane)))>; 4375} 4376 4377// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4378let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4379 DecoderNamespace = "NEONData" in { 4380 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4381 "vmull", "s", ARMvmulls, 1>; 4382 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4383 "vmull", "u", ARMvmullu, 1>; 4384 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4385 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4386 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4387 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4388 Requires<[HasV8, HasCrypto]>; 4389} 4390defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>; 4391defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>; 4392 4393// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4394defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4395 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4396defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4397 "vqdmull", "s", int_arm_neon_vqdmull>; 4398 4399// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4400 4401// VMLA : Vector Multiply Accumulate (integer and floating-point) 4402defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4403 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4404def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4405 v2f32, fmul_su, fadd_mlx>, 4406 Requires<[HasNEON, UseFPVMLx]>; 4407def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4408 v4f32, fmul_su, fadd_mlx>, 4409 Requires<[HasNEON, UseFPVMLx]>; 4410def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", 4411 v4f16, fmul_su, fadd_mlx>, 4412 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4413def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", 4414 v8f16, fmul_su, fadd_mlx>, 4415 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4416defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4417 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4418def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4419 v2f32, fmul_su, fadd_mlx>, 4420 Requires<[HasNEON, UseFPVMLx]>; 4421def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4422 v4f32, v2f32, fmul_su, fadd_mlx>, 4423 Requires<[HasNEON, UseFPVMLx]>; 4424def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", 4425 v4f16, fmul, fadd>, 4426 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4427def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", 4428 v8f16, v4f16, fmul, fadd>, 4429 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4430 4431let Predicates = [HasNEON] in { 4432def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4433 (mul (v8i16 QPR:$src2), 4434 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4435 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4436 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4437 (DSubReg_i16_reg imm:$lane))), 4438 (SubReg_i16_lane imm:$lane)))>; 4439 4440def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4441 (mul (v4i32 QPR:$src2), 4442 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4443 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4444 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4445 (DSubReg_i32_reg imm:$lane))), 4446 (SubReg_i32_lane imm:$lane)))>; 4447} 4448 4449def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4450 (fmul_su (v4f32 QPR:$src2), 4451 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4452 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4453 (v4f32 QPR:$src2), 4454 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4455 (DSubReg_i32_reg imm:$lane))), 4456 (SubReg_i32_lane imm:$lane)))>, 4457 Requires<[HasNEON, UseFPVMLx]>; 4458 4459// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4460defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4461 "vmlal", "s", ARMvmulls, add>; 4462defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4463 "vmlal", "u", ARMvmullu, add>; 4464 4465defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>; 4466defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>; 4467 4468let Predicates = [HasNEON, HasV8_1a] in { 4469 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4470 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4471 // (Q += D * D) 4472 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4473 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4474 null_frag>; 4475 def : Pat<(v4i16 (saddsat 4476 (v4i16 DPR:$src1), 4477 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4478 (v4i16 DPR:$Vm))))), 4479 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4480 def : Pat<(v2i32 (saddsat 4481 (v2i32 DPR:$src1), 4482 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4483 (v2i32 DPR:$Vm))))), 4484 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4485 def : Pat<(v8i16 (saddsat 4486 (v8i16 QPR:$src1), 4487 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4488 (v8i16 QPR:$Vm))))), 4489 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4490 def : Pat<(v4i32 (saddsat 4491 (v4i32 QPR:$src1), 4492 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4493 (v4i32 QPR:$Vm))))), 4494 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4495 4496 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4497 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4498 null_frag>; 4499 def : Pat<(v4i16 (saddsat 4500 (v4i16 DPR:$src1), 4501 (v4i16 (int_arm_neon_vqrdmulh 4502 (v4i16 DPR:$Vn), 4503 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4504 imm:$lane)))))), 4505 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4506 imm:$lane))>; 4507 def : Pat<(v2i32 (saddsat 4508 (v2i32 DPR:$src1), 4509 (v2i32 (int_arm_neon_vqrdmulh 4510 (v2i32 DPR:$Vn), 4511 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4512 imm:$lane)))))), 4513 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4514 imm:$lane))>; 4515 def : Pat<(v8i16 (saddsat 4516 (v8i16 QPR:$src1), 4517 (v8i16 (int_arm_neon_vqrdmulh 4518 (v8i16 QPR:$src2), 4519 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4520 imm:$lane)))))), 4521 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4522 (v8i16 QPR:$src2), 4523 (v4i16 (EXTRACT_SUBREG 4524 QPR:$src3, 4525 (DSubReg_i16_reg imm:$lane))), 4526 (SubReg_i16_lane imm:$lane)))>; 4527 def : Pat<(v4i32 (saddsat 4528 (v4i32 QPR:$src1), 4529 (v4i32 (int_arm_neon_vqrdmulh 4530 (v4i32 QPR:$src2), 4531 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4532 imm:$lane)))))), 4533 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4534 (v4i32 QPR:$src2), 4535 (v2i32 (EXTRACT_SUBREG 4536 QPR:$src3, 4537 (DSubReg_i32_reg imm:$lane))), 4538 (SubReg_i32_lane imm:$lane)))>; 4539 4540 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4541 // (Q -= D * D) 4542 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4543 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4544 null_frag>; 4545 def : Pat<(v4i16 (ssubsat 4546 (v4i16 DPR:$src1), 4547 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4548 (v4i16 DPR:$Vm))))), 4549 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4550 def : Pat<(v2i32 (ssubsat 4551 (v2i32 DPR:$src1), 4552 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4553 (v2i32 DPR:$Vm))))), 4554 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4555 def : Pat<(v8i16 (ssubsat 4556 (v8i16 QPR:$src1), 4557 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4558 (v8i16 QPR:$Vm))))), 4559 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4560 def : Pat<(v4i32 (ssubsat 4561 (v4i32 QPR:$src1), 4562 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4563 (v4i32 QPR:$Vm))))), 4564 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4565 4566 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4567 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4568 null_frag>; 4569 def : Pat<(v4i16 (ssubsat 4570 (v4i16 DPR:$src1), 4571 (v4i16 (int_arm_neon_vqrdmulh 4572 (v4i16 DPR:$Vn), 4573 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4574 imm:$lane)))))), 4575 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4576 def : Pat<(v2i32 (ssubsat 4577 (v2i32 DPR:$src1), 4578 (v2i32 (int_arm_neon_vqrdmulh 4579 (v2i32 DPR:$Vn), 4580 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4581 imm:$lane)))))), 4582 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4583 imm:$lane))>; 4584 def : Pat<(v8i16 (ssubsat 4585 (v8i16 QPR:$src1), 4586 (v8i16 (int_arm_neon_vqrdmulh 4587 (v8i16 QPR:$src2), 4588 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4589 imm:$lane)))))), 4590 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4591 (v8i16 QPR:$src2), 4592 (v4i16 (EXTRACT_SUBREG 4593 QPR:$src3, 4594 (DSubReg_i16_reg imm:$lane))), 4595 (SubReg_i16_lane imm:$lane)))>; 4596 def : Pat<(v4i32 (ssubsat 4597 (v4i32 QPR:$src1), 4598 (v4i32 (int_arm_neon_vqrdmulh 4599 (v4i32 QPR:$src2), 4600 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4601 imm:$lane)))))), 4602 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4603 (v4i32 QPR:$src2), 4604 (v2i32 (EXTRACT_SUBREG 4605 QPR:$src3, 4606 (DSubReg_i32_reg imm:$lane))), 4607 (SubReg_i32_lane imm:$lane)))>; 4608} 4609// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4610defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4611 "vqdmlal", "s", null_frag>; 4612defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4613 4614let Predicates = [HasNEON] in { 4615def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4616 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4617 (v4i16 DPR:$Vm))))), 4618 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4619def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4620 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4621 (v2i32 DPR:$Vm))))), 4622 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4623def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4624 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4625 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4626 imm:$lane)))))), 4627 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4628def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4629 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4630 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4631 imm:$lane)))))), 4632 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4633} 4634 4635// VMLS : Vector Multiply Subtract (integer and floating-point) 4636defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4637 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4638def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4639 v2f32, fmul_su, fsub_mlx>, 4640 Requires<[HasNEON, UseFPVMLx]>; 4641def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4642 v4f32, fmul_su, fsub_mlx>, 4643 Requires<[HasNEON, UseFPVMLx]>; 4644def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", 4645 v4f16, fmul, fsub>, 4646 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4647def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", 4648 v8f16, fmul, fsub>, 4649 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4650defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4651 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4652def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4653 v2f32, fmul_su, fsub_mlx>, 4654 Requires<[HasNEON, UseFPVMLx]>; 4655def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4656 v4f32, v2f32, fmul_su, fsub_mlx>, 4657 Requires<[HasNEON, UseFPVMLx]>; 4658def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", 4659 v4f16, fmul, fsub>, 4660 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4661def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", 4662 v8f16, v4f16, fmul, fsub>, 4663 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4664 4665let Predicates = [HasNEON] in { 4666def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4667 (mul (v8i16 QPR:$src2), 4668 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4669 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4670 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4671 (DSubReg_i16_reg imm:$lane))), 4672 (SubReg_i16_lane imm:$lane)))>; 4673 4674def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4675 (mul (v4i32 QPR:$src2), 4676 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4677 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4678 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4679 (DSubReg_i32_reg imm:$lane))), 4680 (SubReg_i32_lane imm:$lane)))>; 4681} 4682 4683def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4684 (fmul_su (v4f32 QPR:$src2), 4685 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4686 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4687 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4688 (DSubReg_i32_reg imm:$lane))), 4689 (SubReg_i32_lane imm:$lane)))>, 4690 Requires<[HasNEON, UseFPVMLx]>; 4691 4692// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4693defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4694 "vmlsl", "s", ARMvmulls, sub>; 4695defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4696 "vmlsl", "u", ARMvmullu, sub>; 4697 4698defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>; 4699defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>; 4700 4701// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4702defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4703 "vqdmlsl", "s", null_frag>; 4704defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4705 4706let Predicates = [HasNEON] in { 4707def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4708 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4709 (v4i16 DPR:$Vm))))), 4710 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4711def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4712 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4713 (v2i32 DPR:$Vm))))), 4714 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4715def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4716 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4717 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4718 imm:$lane)))))), 4719 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4720def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4721 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4722 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4723 imm:$lane)))))), 4724 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4725} 4726 4727// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4728def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4729 v2f32, fmul_su, fadd_mlx>, 4730 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4731 4732def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4733 v4f32, fmul_su, fadd_mlx>, 4734 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4735def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", 4736 v4f16, fmul, fadd>, 4737 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4738 4739def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", 4740 v8f16, fmul, fadd>, 4741 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4742 4743// Fused Vector Multiply Subtract (floating-point) 4744def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4745 v2f32, fmul_su, fsub_mlx>, 4746 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4747def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4748 v4f32, fmul_su, fsub_mlx>, 4749 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4750def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", 4751 v4f16, fmul, fsub>, 4752 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4753def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", 4754 v8f16, fmul, fsub>, 4755 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4756 4757// Match @llvm.fma.* intrinsics 4758def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4759 (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4760 Requires<[HasNEON,HasFullFP16]>; 4761def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4762 (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4763 Requires<[HasNEON,HasFullFP16]>; 4764def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4765 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4766 Requires<[HasNEON,HasVFP4]>; 4767def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4768 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4769 Requires<[HasNEON,HasVFP4]>; 4770def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4771 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4772 Requires<[HasNEON,HasVFP4]>; 4773def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4774 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4775 Requires<[HasNEON,HasVFP4]>; 4776 4777// ARMv8.2a dot product instructions. 4778// We put them in the VFPV8 decoder namespace because the ARM and Thumb 4779// encodings are the same and thus no further bit twiddling is necessary 4780// in the disassembler. 4781class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm, 4782 string AsmTy, ValueType AccumTy, ValueType InputTy, 4783 SDPatternOperator OpNode> : 4784 N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst), 4785 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, 4786 Asm, AsmTy, 4787 [(set (AccumTy RegTy:$dst), 4788 (OpNode (AccumTy RegTy:$Vd), 4789 (InputTy RegTy:$Vn), 4790 (InputTy RegTy:$Vm)))]> { 4791 let Predicates = [HasDotProd]; 4792 let DecoderNamespace = "VFPV8"; 4793 let Constraints = "$dst = $Vd"; 4794} 4795 4796def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>; 4797def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>; 4798def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>; 4799def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>; 4800 4801// Indexed dot product instructions: 4802multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty, 4803 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode, 4804 dag RHS> { 4805 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst), 4806 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 4807 N3RegFrm, IIC_VDOTPROD, opc, dt, []> { 4808 bit lane; 4809 let Inst{5} = lane; 4810 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); 4811 let Constraints = "$dst = $Vd"; 4812 let Predicates = [HasDotProd]; 4813 let DecoderNamespace = "VFPV8"; 4814 } 4815 4816 def : Pat< 4817 (AccumType (OpNode (AccumType Ty:$Vd), 4818 (InputType Ty:$Vn), 4819 (InputType (bitconvert (AccumType 4820 (ARMvduplane (AccumType Ty:$Vm), 4821 VectorIndex32:$lane)))))), 4822 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>; 4823} 4824 4825defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8, 4826 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>; 4827defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8, 4828 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>; 4829defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8, 4830 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4831defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8, 4832 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4833 4834// v8.6A matrix multiplication extension 4835let Predicates = [HasMatMulInt8] in { 4836 class N3VMatMul<bit B, bit U, string Asm, string AsmTy, 4837 SDPatternOperator OpNode> 4838 : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst), 4839 (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary, 4840 Asm, AsmTy, 4841 [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd), 4842 (v16i8 QPR:$Vn), 4843 (v16i8 QPR:$Vm)))]> { 4844 let DecoderNamespace = "VFPV8"; 4845 let Constraints = "$dst = $Vd"; 4846 } 4847 4848 multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy, 4849 ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode, 4850 dag RHS> { 4851 4852 def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst), 4853 (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm, 4854 NoItinerary, Asm, AsmTy, []> { 4855 bit lane; 4856 let Inst{5} = lane; 4857 let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane"); 4858 let DecoderNamespace = "VFPV8"; 4859 let Constraints = "$dst = $Vd"; 4860 } 4861 4862 def : Pat< 4863 (AccumTy (OpNode (AccumTy RegTy:$Vd), 4864 (InputTy RegTy:$Vn), 4865 (InputTy (bitconvert (AccumTy 4866 (ARMvduplane (AccumTy RegTy:$Vm), 4867 VectorIndex32:$lane)))))), 4868 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4869 4870 } 4871 4872 multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS> 4873 : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> { 4874 def : Pat< 4875 (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd), 4876 (InputTy (bitconvert (AccumTy 4877 (ARMvduplane (AccumTy RegTy:$Vm), 4878 VectorIndex32:$lane)))), 4879 (InputTy RegTy:$Vn))), 4880 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4881 } 4882 4883 def VSMMLA : N3VMatMul<0, 0, "vsmmla", "s8", int_arm_neon_smmla>; 4884 def VUMMLA : N3VMatMul<0, 1, "vummla", "u8", int_arm_neon_ummla>; 4885 def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>; 4886 def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8, int_arm_neon_usdot>; 4887 def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>; 4888 4889 defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8, 4890 int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>; 4891 defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8, 4892 int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4893 defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>; 4894 defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4895} 4896 4897// ARMv8.3 complex operations 4898class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q, 4899 InstrItinClass itin, dag oops, dag iops, 4900 string opc, string dt, list<dag> pattern> 4901 : N3VCP8<{?,?}, {op21,s}, q, op4, oops, 4902 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ 4903 bits<2> rot; 4904 let Inst{24-23} = rot; 4905} 4906 4907class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q, 4908 InstrItinClass itin, dag oops, dag iops, string opc, 4909 string dt, list<dag> pattern> 4910 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, 4911 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { 4912 bits<1> rot; 4913 let Inst{24} = rot; 4914} 4915 4916class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin, 4917 dag oops, dag iops, string opc, string dt, 4918 list<dag> pattern> 4919 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4920 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4921 bits<2> rot; 4922 bit lane; 4923 4924 let Inst{21-20} = rot; 4925 let Inst{5} = lane; 4926} 4927 4928class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin, 4929 dag oops, dag iops, string opc, string dt, 4930 list<dag> pattern> 4931 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4932 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4933 bits<2> rot; 4934 bit lane; 4935 4936 let Inst{21-20} = rot; 4937 let Inst{5} = Vm{4}; 4938 // This is needed because the lane operand does not have any bits in the 4939 // encoding (it only has one possible value), so we need to manually set it 4940 // to it's default value. 4941 let DecoderMethod = "DecodeNEONComplexLane64Instruction"; 4942} 4943 4944multiclass N3VCP8ComplexTied<bit op21, bit op4, 4945 string OpcodeStr, SDPatternOperator Op> { 4946 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4947 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd), 4948 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4949 OpcodeStr, "f16", []>; 4950 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd), 4951 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4952 OpcodeStr, "f16", []>; 4953 } 4954 let Predicates = [HasNEON,HasV8_3a] in { 4955 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd), 4956 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4957 OpcodeStr, "f32", []>; 4958 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd), 4959 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4960 OpcodeStr, "f32", []>; 4961 } 4962} 4963 4964multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4, 4965 string OpcodeStr, SDPatternOperator Op> { 4966 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4967 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD, 4968 (outs DPR:$Vd), 4969 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4970 OpcodeStr, "f16", []>; 4971 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ, 4972 (outs QPR:$Vd), 4973 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4974 OpcodeStr, "f16", []>; 4975 } 4976 let Predicates = [HasNEON,HasV8_3a] in { 4977 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD, 4978 (outs DPR:$Vd), 4979 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4980 OpcodeStr, "f32", []>; 4981 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ, 4982 (outs QPR:$Vd), 4983 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4984 OpcodeStr, "f32", []>; 4985 } 4986} 4987 4988// These instructions index by pairs of lanes, so the VectorIndexes are twice 4989// as wide as the data types. 4990multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr, 4991 SDPatternOperator Op> { 4992 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4993 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD, 4994 (outs DPR:$Vd), 4995 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4996 VectorIndex32:$lane, complexrotateop:$rot), 4997 OpcodeStr, "f16", []>; 4998 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ, 4999 (outs QPR:$Vd), 5000 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, 5001 VectorIndex32:$lane, complexrotateop:$rot), 5002 OpcodeStr, "f16", []>; 5003 } 5004 let Predicates = [HasNEON,HasV8_3a] in { 5005 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD, 5006 (outs DPR:$Vd), 5007 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5008 complexrotateop:$rot), 5009 OpcodeStr, "f32", []>; 5010 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ, 5011 (outs QPR:$Vd), 5012 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5013 complexrotateop:$rot), 5014 OpcodeStr, "f32", []>; 5015 } 5016} 5017 5018defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; 5019defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; 5020defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; 5021 5022let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5023 def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5024 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>; 5025 def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5026 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>; 5027 def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5028 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>; 5029 def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5030 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>; 5031} 5032let Predicates = [HasNEON,HasV8_3a] in { 5033 def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5034 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>; 5035 def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5036 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>; 5037 def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5038 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>; 5039 def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5040 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>; 5041} 5042 5043// Vector Subtract Operations. 5044 5045// VSUB : Vector Subtract (integer and floating-point) 5046defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 5047 "vsub", "i", sub, 0>; 5048def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 5049 v2f32, v2f32, fsub, 0>; 5050def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 5051 v4f32, v4f32, fsub, 0>; 5052def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", 5053 v4f16, v4f16, fsub, 0>, 5054 Requires<[HasNEON,HasFullFP16]>; 5055def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", 5056 v8f16, v8f16, fsub, 0>, 5057 Requires<[HasNEON,HasFullFP16]>; 5058// VSUBL : Vector Subtract Long (Q = D - D) 5059defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5060 "vsubl", "s", sub, sext, 0>; 5061defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5062 "vsubl", "u", sub, zext, 0>; 5063// VSUBW : Vector Subtract Wide (Q = Q - D) 5064defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 5065defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 5066// VHSUB : Vector Halving Subtract 5067defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 5068 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5069 "vhsub", "s", int_arm_neon_vhsubs, 0>; 5070defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 5071 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5072 "vhsub", "u", int_arm_neon_vhsubu, 0>; 5073// VQSUB : Vector Saturing Subtract 5074defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 5075 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5076 "vqsub", "s", ssubsat, 0>; 5077defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 5078 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5079 "vqsub", "u", usubsat, 0>; 5080// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 5081defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 5082// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 5083defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 5084 int_arm_neon_vrsubhn, 0>; 5085 5086let Predicates = [HasNEON] in { 5087def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 5088 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 5089def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 5090 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 5091def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 5092 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 5093} 5094 5095// Vector Comparisons. 5096 5097// VCEQ : Vector Compare Equal 5098defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5099 IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>; 5100def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 5101 ARMCCeq, 1>; 5102def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 5103 ARMCCeq, 1>; 5104def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, 5105 ARMCCeq, 1>, 5106 Requires<[HasNEON, HasFullFP16]>; 5107def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, 5108 ARMCCeq, 1>, 5109 Requires<[HasNEON, HasFullFP16]>; 5110 5111let TwoOperandAliasConstraint = "$Vm = $Vd" in 5112defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 5113 "$Vd, $Vm, #0", ARMCCeq>; 5114 5115// VCGE : Vector Compare Greater Than or Equal 5116defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5117 IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>; 5118defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5119 IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>; 5120def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 5121 ARMCCge, 0>; 5122def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 5123 ARMCCge, 0>; 5124def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, 5125 ARMCCge, 0>, 5126 Requires<[HasNEON, HasFullFP16]>; 5127def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, 5128 ARMCCge, 0>, 5129 Requires<[HasNEON, HasFullFP16]>; 5130 5131let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5132defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 5133 "$Vd, $Vm, #0", ARMCCge>; 5134defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 5135 "$Vd, $Vm, #0", ARMCCle>; 5136} 5137 5138// VCGT : Vector Compare Greater Than 5139defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5140 IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>; 5141defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5142 IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>; 5143def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 5144 ARMCCgt, 0>; 5145def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 5146 ARMCCgt, 0>; 5147def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, 5148 ARMCCgt, 0>, 5149 Requires<[HasNEON, HasFullFP16]>; 5150def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, 5151 ARMCCgt, 0>, 5152 Requires<[HasNEON, HasFullFP16]>; 5153 5154let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5155defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 5156 "$Vd, $Vm, #0", ARMCCgt>; 5157defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 5158 "$Vd, $Vm, #0", ARMCClt>; 5159} 5160 5161// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 5162def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5163 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 5164def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5165 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 5166def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5167 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, 5168 Requires<[HasNEON, HasFullFP16]>; 5169def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5170 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, 5171 Requires<[HasNEON, HasFullFP16]>; 5172// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 5173def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5174 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 5175def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5176 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 5177def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5178 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, 5179 Requires<[HasNEON, HasFullFP16]>; 5180def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5181 "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>, 5182 Requires<[HasNEON, HasFullFP16]>; 5183// VTST : Vector Test Bits 5184defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 5185 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 5186 5187def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5188 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5189def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5190 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5191def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5192 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5193def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5194 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5195let Predicates = [HasNEON, HasFullFP16] in { 5196def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5197 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5198def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5199 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5200def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5201 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5202def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5203 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5204} 5205 5206// +fp16fml Floating Point Multiplication Variants 5207let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in { 5208 5209class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn, 5210 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5211 : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5212 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5213 5214class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn, 5215 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5216 : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5217 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5218 5219// Vd, Vs, Vs[0-15], Idx[0-1] 5220class VFMD<string opc, string type, bits<2> S> 5221 : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd), 5222 (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx), 5223 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5224 bit idx; 5225 let Inst{3} = idx; 5226 let Inst{19-16} = Vn{4-1}; 5227 let Inst{7} = Vn{0}; 5228 let Inst{5} = Vm{0}; 5229 let Inst{2-0} = Vm{3-1}; 5230} 5231 5232// Vq, Vd, Vd[0-7], Idx[0-3] 5233class VFMQ<string opc, string type, bits<2> S> 5234 : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd), 5235 (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 5236 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5237 bits<2> idx; 5238 let Inst{5} = idx{1}; 5239 let Inst{3} = idx{0}; 5240} 5241 5242// op1 op2 op3 5243def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>; 5244def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>; 5245def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>; 5246def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>; 5247def VFMALDI : VFMD<"vfmal", "f16", 0b00>; 5248def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>; 5249def VFMALQI : VFMQ<"vfmal", "f16", 0b00>; 5250def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>; 5251} // HasNEON, HasFP16FML 5252 5253 5254def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5255 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5256def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5257 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5258def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5259 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5260def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5261 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5262let Predicates = [HasNEON, HasFullFP16] in { 5263def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5264 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5265def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5266 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5267def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5268 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5269def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5270 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5271} 5272 5273// Vector Bitwise Operations. 5274 5275def vnotd : PatFrag<(ops node:$in), 5276 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 5277def vnotq : PatFrag<(ops node:$in), 5278 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 5279 5280 5281// VAND : Vector Bitwise AND 5282def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 5283 v2i32, v2i32, and, 1>; 5284def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 5285 v4i32, v4i32, and, 1>; 5286 5287// VEOR : Vector Bitwise Exclusive OR 5288def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 5289 v2i32, v2i32, xor, 1>; 5290def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 5291 v4i32, v4i32, xor, 1>; 5292 5293// VORR : Vector Bitwise OR 5294def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 5295 v2i32, v2i32, or, 1>; 5296def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 5297 v4i32, v4i32, or, 1>; 5298 5299def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 5300 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5301 IIC_VMOVImm, 5302 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5303 [(set DPR:$Vd, 5304 (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5305 let Inst{9} = SIMM{9}; 5306} 5307 5308def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 5309 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5310 IIC_VMOVImm, 5311 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5312 [(set DPR:$Vd, 5313 (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5314 let Inst{10-9} = SIMM{10-9}; 5315} 5316 5317def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 5318 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5319 IIC_VMOVImm, 5320 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5321 [(set QPR:$Vd, 5322 (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5323 let Inst{9} = SIMM{9}; 5324} 5325 5326def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 5327 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5328 IIC_VMOVImm, 5329 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5330 [(set QPR:$Vd, 5331 (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5332 let Inst{10-9} = SIMM{10-9}; 5333} 5334 5335 5336// VBIC : Vector Bitwise Bit Clear (AND NOT) 5337let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5338def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5339 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5340 "vbic", "$Vd, $Vn, $Vm", "", 5341 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 5342 (vnotd DPR:$Vm))))]>; 5343def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5344 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5345 "vbic", "$Vd, $Vn, $Vm", "", 5346 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 5347 (vnotq QPR:$Vm))))]>; 5348} 5349 5350def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 5351 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5352 IIC_VMOVImm, 5353 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5354 [(set DPR:$Vd, 5355 (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5356 let Inst{9} = SIMM{9}; 5357} 5358 5359def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 5360 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5361 IIC_VMOVImm, 5362 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5363 [(set DPR:$Vd, 5364 (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5365 let Inst{10-9} = SIMM{10-9}; 5366} 5367 5368def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 5369 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5370 IIC_VMOVImm, 5371 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5372 [(set QPR:$Vd, 5373 (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5374 let Inst{9} = SIMM{9}; 5375} 5376 5377def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 5378 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5379 IIC_VMOVImm, 5380 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5381 [(set QPR:$Vd, 5382 (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5383 let Inst{10-9} = SIMM{10-9}; 5384} 5385 5386// VORN : Vector Bitwise OR NOT 5387def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 5388 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5389 "vorn", "$Vd, $Vn, $Vm", "", 5390 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 5391 (vnotd DPR:$Vm))))]>; 5392def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 5393 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5394 "vorn", "$Vd, $Vn, $Vm", "", 5395 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 5396 (vnotq QPR:$Vm))))]>; 5397 5398// VMVN : Vector Bitwise NOT (Immediate) 5399 5400let isReMaterializable = 1 in { 5401 5402def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 5403 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5404 "vmvn", "i16", "$Vd, $SIMM", "", 5405 [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> { 5406 let Inst{9} = SIMM{9}; 5407} 5408 5409def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 5410 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5411 "vmvn", "i16", "$Vd, $SIMM", "", 5412 [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> { 5413 let Inst{9} = SIMM{9}; 5414} 5415 5416def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 5417 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5418 "vmvn", "i32", "$Vd, $SIMM", "", 5419 [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> { 5420 let Inst{11-8} = SIMM{11-8}; 5421} 5422 5423def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 5424 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5425 "vmvn", "i32", "$Vd, $SIMM", "", 5426 [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> { 5427 let Inst{11-8} = SIMM{11-8}; 5428} 5429} 5430 5431// VMVN : Vector Bitwise NOT 5432def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 5433 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 5434 "vmvn", "$Vd, $Vm", "", 5435 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 5436def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 5437 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 5438 "vmvn", "$Vd, $Vm", "", 5439 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 5440let Predicates = [HasNEON] in { 5441def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 5442def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 5443} 5444 5445// VBSL : Vector Bitwise Select 5446def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5447 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5448 N3RegFrm, IIC_VCNTiD, 5449 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5450 [(set DPR:$Vd, 5451 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 5452let Predicates = [HasNEON] in { 5453def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 5454 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 5455 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5456def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 5457 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 5458 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5459def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 5460 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 5461 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5462def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 5463 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 5464 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5465def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 5466 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 5467 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5468 5469def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 5470 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5471 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5472 5473def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 5474 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5475 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5476} 5477 5478def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5479 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5480 N3RegFrm, IIC_VCNTiQ, 5481 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5482 [(set QPR:$Vd, 5483 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 5484 5485let Predicates = [HasNEON] in { 5486def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 5487 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 5488 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5489def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 5490 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 5491 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5492def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 5493 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 5494 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5495def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 5496 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 5497 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5498def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 5499 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 5500 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5501 5502def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 5503 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5504 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5505def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 5506 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5507 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5508} 5509 5510// VBIF : Vector Bitwise Insert if False 5511// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 5512// FIXME: This instruction's encoding MAY NOT BE correct. 5513def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 5514 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5515 N3RegFrm, IIC_VBINiD, 5516 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5517 []>; 5518def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 5519 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5520 N3RegFrm, IIC_VBINiQ, 5521 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5522 []>; 5523 5524// VBIT : Vector Bitwise Insert if True 5525// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 5526// FIXME: This instruction's encoding MAY NOT BE correct. 5527def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 5528 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5529 N3RegFrm, IIC_VBINiD, 5530 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5531 []>; 5532def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 5533 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5534 N3RegFrm, IIC_VBINiQ, 5535 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5536 []>; 5537 5538// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 5539// for equivalent operations with different register constraints; it just 5540// inserts copies. 5541 5542// Vector Absolute Differences. 5543 5544// VABD : Vector Absolute Difference 5545defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 5546 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5547 "vabd", "s", int_arm_neon_vabds, 1>; 5548defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 5549 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5550 "vabd", "u", int_arm_neon_vabdu, 1>; 5551def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5552 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5553def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5554 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5555def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, 5556 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, 5557 Requires<[HasNEON, HasFullFP16]>; 5558def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5559 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, 5560 Requires<[HasNEON, HasFullFP16]>; 5561 5562// VABDL : Vector Absolute Difference Long (Q = | D - D |) 5563defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5564 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5565defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5566 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5567 5568let Predicates = [HasNEON] in { 5569def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), 5570 (VABDLuv8i16 DPR:$opA, DPR:$opB)>; 5571def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), 5572 (VABDLuv4i32 DPR:$opA, DPR:$opB)>; 5573} 5574 5575// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the 5576// shift/xor pattern for ABS. 5577 5578def abd_shr : 5579 PatFrag<(ops node:$in1, node:$in2, node:$shift), 5580 (ARMvshrsImm (sub (zext node:$in1), 5581 (zext node:$in2)), (i32 $shift))>; 5582 5583let Predicates = [HasNEON] in { 5584def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), 5585 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), 5586 (zext (v2i32 DPR:$opB))), 5587 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), 5588 (VABDLuv2i64 DPR:$opA, DPR:$opB)>; 5589} 5590 5591// VABA : Vector Absolute Difference and Accumulate 5592defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5593 "vaba", "s", int_arm_neon_vabds, add>; 5594defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5595 "vaba", "u", int_arm_neon_vabdu, add>; 5596 5597// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5598defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5599 "vabal", "s", int_arm_neon_vabds, zext, add>; 5600defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5601 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5602 5603// Vector Maximum and Minimum. 5604 5605// VMAX : Vector Maximum 5606defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5607 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5608 "vmax", "s", smax, 1>; 5609defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5610 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5611 "vmax", "u", umax, 1>; 5612def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5613 "vmax", "f32", 5614 v2f32, v2f32, fmaximum, 1>; 5615def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5616 "vmax", "f32", 5617 v4f32, v4f32, fmaximum, 1>; 5618def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, 5619 "vmax", "f16", 5620 v4f16, v4f16, fmaximum, 1>, 5621 Requires<[HasNEON, HasFullFP16]>; 5622def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5623 "vmax", "f16", 5624 v8f16, v8f16, fmaximum, 1>, 5625 Requires<[HasNEON, HasFullFP16]>; 5626 5627// VMAXNM 5628let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5629 def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5630 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5631 v2f32, v2f32, fmaxnum, 1>, 5632 Requires<[HasV8, HasNEON]>; 5633 def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5634 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5635 v4f32, v4f32, fmaxnum, 1>, 5636 Requires<[HasV8, HasNEON]>; 5637 def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, 5638 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5639 v4f16, v4f16, fmaxnum, 1>, 5640 Requires<[HasV8, HasNEON, HasFullFP16]>; 5641 def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, 5642 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5643 v8f16, v8f16, fmaxnum, 1>, 5644 Requires<[HasV8, HasNEON, HasFullFP16]>; 5645} 5646 5647// VMIN : Vector Minimum 5648defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5649 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5650 "vmin", "s", smin, 1>; 5651defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5652 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5653 "vmin", "u", umin, 1>; 5654def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5655 "vmin", "f32", 5656 v2f32, v2f32, fminimum, 1>; 5657def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5658 "vmin", "f32", 5659 v4f32, v4f32, fminimum, 1>; 5660def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, 5661 "vmin", "f16", 5662 v4f16, v4f16, fminimum, 1>, 5663 Requires<[HasNEON, HasFullFP16]>; 5664def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5665 "vmin", "f16", 5666 v8f16, v8f16, fminimum, 1>, 5667 Requires<[HasNEON, HasFullFP16]>; 5668 5669// VMINNM 5670let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5671 def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5672 N3RegFrm, NoItinerary, "vminnm", "f32", 5673 v2f32, v2f32, fminnum, 1>, 5674 Requires<[HasV8, HasNEON]>; 5675 def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5676 N3RegFrm, NoItinerary, "vminnm", "f32", 5677 v4f32, v4f32, fminnum, 1>, 5678 Requires<[HasV8, HasNEON]>; 5679 def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, 5680 N3RegFrm, NoItinerary, "vminnm", "f16", 5681 v4f16, v4f16, fminnum, 1>, 5682 Requires<[HasV8, HasNEON, HasFullFP16]>; 5683 def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, 5684 N3RegFrm, NoItinerary, "vminnm", "f16", 5685 v8f16, v8f16, fminnum, 1>, 5686 Requires<[HasV8, HasNEON, HasFullFP16]>; 5687} 5688 5689// Vector Pairwise Operations. 5690 5691// VPADD : Vector Pairwise Add 5692def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5693 "vpadd", "i8", 5694 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5695def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5696 "vpadd", "i16", 5697 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5698def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5699 "vpadd", "i32", 5700 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5701def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5702 IIC_VPBIND, "vpadd", "f32", 5703 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5704def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, 5705 IIC_VPBIND, "vpadd", "f16", 5706 v4f16, v4f16, int_arm_neon_vpadd, 0>, 5707 Requires<[HasNEON, HasFullFP16]>; 5708 5709// VPADDL : Vector Pairwise Add Long 5710defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5711 int_arm_neon_vpaddls>; 5712defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5713 int_arm_neon_vpaddlu>; 5714 5715// VPADAL : Vector Pairwise Add and Accumulate Long 5716defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5717 int_arm_neon_vpadals>; 5718defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5719 int_arm_neon_vpadalu>; 5720 5721// VPMAX : Vector Pairwise Maximum 5722def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5723 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5724def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5725 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5726def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5727 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5728def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5729 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5730def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5731 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5732def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5733 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5734def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5735 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5736def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5737 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, 5738 Requires<[HasNEON, HasFullFP16]>; 5739 5740// VPMIN : Vector Pairwise Minimum 5741def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5742 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5743def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5744 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5745def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5746 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5747def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5748 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5749def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5750 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5751def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5752 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5753def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5754 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5755def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5756 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, 5757 Requires<[HasNEON, HasFullFP16]>; 5758 5759// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5760 5761// VRECPE : Vector Reciprocal Estimate 5762def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5763 IIC_VUNAD, "vrecpe", "u32", 5764 v2i32, v2i32, int_arm_neon_vrecpe>; 5765def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5766 IIC_VUNAQ, "vrecpe", "u32", 5767 v4i32, v4i32, int_arm_neon_vrecpe>; 5768def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5769 IIC_VUNAD, "vrecpe", "f32", 5770 v2f32, v2f32, int_arm_neon_vrecpe>; 5771def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5772 IIC_VUNAQ, "vrecpe", "f32", 5773 v4f32, v4f32, int_arm_neon_vrecpe>; 5774def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5775 IIC_VUNAD, "vrecpe", "f16", 5776 v4f16, v4f16, int_arm_neon_vrecpe>, 5777 Requires<[HasNEON, HasFullFP16]>; 5778def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5779 IIC_VUNAQ, "vrecpe", "f16", 5780 v8f16, v8f16, int_arm_neon_vrecpe>, 5781 Requires<[HasNEON, HasFullFP16]>; 5782 5783// VRECPS : Vector Reciprocal Step 5784def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5785 IIC_VRECSD, "vrecps", "f32", 5786 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5787def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5788 IIC_VRECSQ, "vrecps", "f32", 5789 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5790def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5791 IIC_VRECSD, "vrecps", "f16", 5792 v4f16, v4f16, int_arm_neon_vrecps, 1>, 5793 Requires<[HasNEON, HasFullFP16]>; 5794def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5795 IIC_VRECSQ, "vrecps", "f16", 5796 v8f16, v8f16, int_arm_neon_vrecps, 1>, 5797 Requires<[HasNEON, HasFullFP16]>; 5798 5799// VRSQRTE : Vector Reciprocal Square Root Estimate 5800def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5801 IIC_VUNAD, "vrsqrte", "u32", 5802 v2i32, v2i32, int_arm_neon_vrsqrte>; 5803def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5804 IIC_VUNAQ, "vrsqrte", "u32", 5805 v4i32, v4i32, int_arm_neon_vrsqrte>; 5806def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5807 IIC_VUNAD, "vrsqrte", "f32", 5808 v2f32, v2f32, int_arm_neon_vrsqrte>; 5809def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5810 IIC_VUNAQ, "vrsqrte", "f32", 5811 v4f32, v4f32, int_arm_neon_vrsqrte>; 5812def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5813 IIC_VUNAD, "vrsqrte", "f16", 5814 v4f16, v4f16, int_arm_neon_vrsqrte>, 5815 Requires<[HasNEON, HasFullFP16]>; 5816def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5817 IIC_VUNAQ, "vrsqrte", "f16", 5818 v8f16, v8f16, int_arm_neon_vrsqrte>, 5819 Requires<[HasNEON, HasFullFP16]>; 5820 5821// VRSQRTS : Vector Reciprocal Square Root Step 5822def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5823 IIC_VRECSD, "vrsqrts", "f32", 5824 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5825def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5826 IIC_VRECSQ, "vrsqrts", "f32", 5827 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5828def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5829 IIC_VRECSD, "vrsqrts", "f16", 5830 v4f16, v4f16, int_arm_neon_vrsqrts, 1>, 5831 Requires<[HasNEON, HasFullFP16]>; 5832def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5833 IIC_VRECSQ, "vrsqrts", "f16", 5834 v8f16, v8f16, int_arm_neon_vrsqrts, 1>, 5835 Requires<[HasNEON, HasFullFP16]>; 5836 5837// Vector Shifts. 5838 5839// VSHL : Vector Shift 5840defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5841 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5842 "vshl", "s", int_arm_neon_vshifts>; 5843defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5844 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5845 "vshl", "u", int_arm_neon_vshiftu>; 5846 5847let Predicates = [HasNEON] in { 5848def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5849 (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>; 5850def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5851 (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>; 5852def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5853 (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>; 5854def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5855 (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>; 5856def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5857 (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>; 5858def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5859 (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>; 5860def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5861 (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>; 5862def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5863 (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>; 5864 5865def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5866 (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>; 5867def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5868 (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>; 5869def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5870 (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>; 5871def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5872 (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>; 5873def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5874 (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>; 5875def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5876 (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>; 5877def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5878 (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>; 5879def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5880 (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>; 5881 5882} 5883 5884// VSHL : Vector Shift Left (Immediate) 5885defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>; 5886 5887// VSHR : Vector Shift Right (Immediate) 5888defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 5889 ARMvshrsImm>; 5890defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 5891 ARMvshruImm>; 5892 5893// VSHLL : Vector Shift Left Long 5894defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 5895 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>; 5896defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 5897 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>; 5898 5899// VSHLL : Vector Shift Left Long (with maximum shift count) 5900class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 5901 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 5902 ValueType OpTy, Operand ImmTy> 5903 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 5904 ResTy, OpTy, ImmTy, null_frag> { 5905 let Inst{21-16} = op21_16; 5906 let DecoderMethod = "DecodeVSHLMaxInstruction"; 5907} 5908def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 5909 v8i16, v8i8, imm8>; 5910def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 5911 v4i32, v4i16, imm16>; 5912def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 5913 v2i64, v2i32, imm32>; 5914 5915let Predicates = [HasNEON] in { 5916def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))), 5917 (VSHLLi8 DPR:$Rn, 8)>; 5918def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))), 5919 (VSHLLi16 DPR:$Rn, 16)>; 5920def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))), 5921 (VSHLLi32 DPR:$Rn, 32)>; 5922def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))), 5923 (VSHLLi8 DPR:$Rn, 8)>; 5924def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))), 5925 (VSHLLi16 DPR:$Rn, 16)>; 5926def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))), 5927 (VSHLLi32 DPR:$Rn, 32)>; 5928def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))), 5929 (VSHLLi8 DPR:$Rn, 8)>; 5930def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))), 5931 (VSHLLi16 DPR:$Rn, 16)>; 5932def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))), 5933 (VSHLLi32 DPR:$Rn, 32)>; 5934} 5935 5936// VSHRN : Vector Shift Right and Narrow 5937defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 5938 PatFrag<(ops node:$Rn, node:$amt), 5939 (trunc (ARMvshrsImm node:$Rn, node:$amt))>>; 5940 5941let Predicates = [HasNEON] in { 5942def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))), 5943 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 5944def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))), 5945 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 5946def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))), 5947 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 5948} 5949 5950// VRSHL : Vector Rounding Shift 5951defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 5952 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5953 "vrshl", "s", int_arm_neon_vrshifts>; 5954defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 5955 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5956 "vrshl", "u", int_arm_neon_vrshiftu>; 5957// VRSHR : Vector Rounding Shift Right 5958defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 5959 NEONvrshrsImm>; 5960defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 5961 NEONvrshruImm>; 5962 5963// VRSHRN : Vector Rounding Shift Right and Narrow 5964defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 5965 NEONvrshrnImm>; 5966 5967// VQSHL : Vector Saturating Shift 5968defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 5969 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5970 "vqshl", "s", int_arm_neon_vqshifts>; 5971defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 5972 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5973 "vqshl", "u", int_arm_neon_vqshiftu>; 5974// VQSHL : Vector Saturating Shift Left (Immediate) 5975defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>; 5976defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>; 5977 5978// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 5979defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>; 5980 5981// VQSHRN : Vector Saturating Shift Right and Narrow 5982defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 5983 NEONvqshrnsImm>; 5984defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 5985 NEONvqshrnuImm>; 5986 5987// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 5988defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 5989 NEONvqshrnsuImm>; 5990 5991// VQRSHL : Vector Saturating Rounding Shift 5992defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 5993 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5994 "vqrshl", "s", int_arm_neon_vqrshifts>; 5995defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 5996 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5997 "vqrshl", "u", int_arm_neon_vqrshiftu>; 5998 5999// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 6000defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 6001 NEONvqrshrnsImm>; 6002defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 6003 NEONvqrshrnuImm>; 6004 6005// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 6006defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 6007 NEONvqrshrnsuImm>; 6008 6009// VSRA : Vector Shift Right and Accumulate 6010defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>; 6011defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>; 6012// VRSRA : Vector Rounding Shift Right and Accumulate 6013defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>; 6014defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>; 6015 6016// VSLI : Vector Shift Left and Insert 6017defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 6018 6019// VSRI : Vector Shift Right and Insert 6020defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 6021 6022// Vector Absolute and Saturating Absolute. 6023 6024// VABS : Vector Absolute Value 6025defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 6026 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; 6027def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6028 "vabs", "f32", 6029 v2f32, v2f32, fabs>; 6030def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6031 "vabs", "f32", 6032 v4f32, v4f32, fabs>; 6033def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6034 "vabs", "f16", 6035 v4f16, v4f16, fabs>, 6036 Requires<[HasNEON, HasFullFP16]>; 6037def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6038 "vabs", "f16", 6039 v8f16, v8f16, fabs>, 6040 Requires<[HasNEON, HasFullFP16]>; 6041 6042// VQABS : Vector Saturating Absolute Value 6043defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 6044 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 6045 int_arm_neon_vqabs>; 6046 6047// Vector Negate. 6048 6049def vnegd : PatFrag<(ops node:$in), 6050 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 6051def vnegq : PatFrag<(ops node:$in), 6052 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 6053 6054class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6055 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 6056 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 6057 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 6058class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6059 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 6060 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 6061 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 6062 6063// VNEG : Vector Negate (integer) 6064def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 6065def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 6066def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 6067def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 6068def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 6069def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 6070 6071// VNEG : Vector Negate (floating-point) 6072def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 6073 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6074 "vneg", "f32", "$Vd, $Vm", "", 6075 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 6076def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 6077 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6078 "vneg", "f32", "$Vd, $Vm", "", 6079 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 6080def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, 6081 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6082 "vneg", "f16", "$Vd, $Vm", "", 6083 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, 6084 Requires<[HasNEON, HasFullFP16]>; 6085def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, 6086 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6087 "vneg", "f16", "$Vd, $Vm", "", 6088 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, 6089 Requires<[HasNEON, HasFullFP16]>; 6090 6091let Predicates = [HasNEON] in { 6092def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 6093def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 6094def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 6095def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 6096def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 6097def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 6098} 6099 6100// VQNEG : Vector Saturating Negate 6101defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 6102 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 6103 int_arm_neon_vqneg>; 6104 6105// Vector Bit Counting Operations. 6106 6107// VCLS : Vector Count Leading Sign Bits 6108defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 6109 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 6110 int_arm_neon_vcls>; 6111// VCLZ : Vector Count Leading Zeros 6112defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 6113 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 6114 ctlz>; 6115// VCNT : Vector Count One Bits 6116def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6117 IIC_VCNTiD, "vcnt", "8", 6118 v8i8, v8i8, ctpop>; 6119def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6120 IIC_VCNTiQ, "vcnt", "8", 6121 v16i8, v16i8, ctpop>; 6122 6123// Vector Swap 6124def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 6125 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 6126 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6127 []>; 6128def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 6129 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 6130 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6131 []>; 6132 6133// Vector Move Operations. 6134 6135// VMOV : Vector Move (Register) 6136def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6137 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6138def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6139 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6140 6141// VMOV : Vector Move (Immediate) 6142 6143// Although VMOVs are not strictly speaking cheap, they are as expensive 6144// as their copies counterpart (VORR), so we should prefer rematerialization 6145// over splitting when it applies. 6146let isReMaterializable = 1, isAsCheapAsAMove=1 in { 6147def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 6148 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6149 "vmov", "i8", "$Vd, $SIMM", "", 6150 [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>; 6151def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 6152 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6153 "vmov", "i8", "$Vd, $SIMM", "", 6154 [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>; 6155 6156def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 6157 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6158 "vmov", "i16", "$Vd, $SIMM", "", 6159 [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> { 6160 let Inst{9} = SIMM{9}; 6161} 6162 6163def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 6164 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6165 "vmov", "i16", "$Vd, $SIMM", "", 6166 [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> { 6167 let Inst{9} = SIMM{9}; 6168} 6169 6170def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 6171 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6172 "vmov", "i32", "$Vd, $SIMM", "", 6173 [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> { 6174 let Inst{11-8} = SIMM{11-8}; 6175} 6176 6177def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 6178 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6179 "vmov", "i32", "$Vd, $SIMM", "", 6180 [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> { 6181 let Inst{11-8} = SIMM{11-8}; 6182} 6183 6184def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 6185 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6186 "vmov", "i64", "$Vd, $SIMM", "", 6187 [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>; 6188def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 6189 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6190 "vmov", "i64", "$Vd, $SIMM", "", 6191 [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>; 6192 6193def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 6194 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6195 "vmov", "f32", "$Vd, $SIMM", "", 6196 [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>; 6197def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 6198 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6199 "vmov", "f32", "$Vd, $SIMM", "", 6200 [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>; 6201} // isReMaterializable, isAsCheapAsAMove 6202 6203// Add support for bytes replication feature, so it could be GAS compatible. 6204multiclass NEONImmReplicateI8InstAlias<ValueType To> { 6205 // E.g. instructions below: 6206 // "vmov.i32 d0, #0xffffffff" 6207 // "vmov.i32 d0, #0xabababab" 6208 // "vmov.i16 d0, #0xabab" 6209 // are incorrect, but we could deal with such cases. 6210 // For last two instructions, for example, it should emit: 6211 // "vmov.i8 d0, #0xab" 6212 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6213 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6214 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6215 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6216 // Also add same support for VMVN instructions. So instruction: 6217 // "vmvn.i32 d0, #0xabababab" 6218 // actually means: 6219 // "vmov.i8 d0, #0x54" 6220 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6221 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6222 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6223 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6224} 6225 6226defm : NEONImmReplicateI8InstAlias<i16>; 6227defm : NEONImmReplicateI8InstAlias<i32>; 6228defm : NEONImmReplicateI8InstAlias<i64>; 6229 6230// Similar to above for types other than i8, e.g.: 6231// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00" 6232// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000" 6233// In this case we do not canonicalize VMVN to VMOV 6234multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16, 6235 NeonI NV8, NeonI NV16, ValueType To> { 6236 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6237 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6238 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6239 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6240 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6241 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6242 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6243 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6244} 6245 6246defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6247 VMVNv4i16, VMVNv8i16, i32>; 6248defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6249 VMVNv4i16, VMVNv8i16, i64>; 6250defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32, 6251 VMVNv2i32, VMVNv4i32, i64>; 6252// TODO: add "VMOV <-> VMVN" conversion for cases like 6253// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55" 6254// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00" 6255 6256// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 6257// require zero cycles to execute so they should be used wherever possible for 6258// setting a register to zero. 6259 6260// Even without these pseudo-insts we would probably end up with the correct 6261// instruction, but we could not mark the general ones with "isAsCheapAsAMove" 6262// since they are sometimes rather expensive (in general). 6263 6264let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 6265 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 6266 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], 6267 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 6268 Requires<[HasZCZ]>; 6269 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 6270 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], 6271 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 6272 Requires<[HasZCZ]>; 6273} 6274 6275// VMOV : Vector Get Lane (move scalar to ARM core register) 6276 6277def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 6278 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6279 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 6280 [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V), 6281 imm:$lane))]> { 6282 let Inst{21} = lane{2}; 6283 let Inst{6-5} = lane{1-0}; 6284} 6285def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 6286 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6287 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 6288 [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V), 6289 imm:$lane))]> { 6290 let Inst{21} = lane{1}; 6291 let Inst{6} = lane{0}; 6292} 6293def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 6294 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6295 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 6296 [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V), 6297 imm:$lane))]> { 6298 let Inst{21} = lane{2}; 6299 let Inst{6-5} = lane{1-0}; 6300} 6301def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 6302 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6303 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 6304 [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V), 6305 imm:$lane))]> { 6306 let Inst{21} = lane{1}; 6307 let Inst{6} = lane{0}; 6308} 6309def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 6310 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 6311 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 6312 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 6313 imm:$lane))]>, 6314 Requires<[HasFPRegs, HasFastVGETLNi32]> { 6315 let Inst{21} = lane{0}; 6316} 6317let Predicates = [HasNEON] in { 6318// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 6319def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane), 6320 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6321 (DSubReg_i8_reg imm:$lane))), 6322 (SubReg_i8_lane imm:$lane))>; 6323def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane), 6324 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6325 (DSubReg_i16_reg imm:$lane))), 6326 (SubReg_i16_lane imm:$lane))>; 6327def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane), 6328 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6329 (DSubReg_i8_reg imm:$lane))), 6330 (SubReg_i8_lane imm:$lane))>; 6331def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane), 6332 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6333 (DSubReg_i16_reg imm:$lane))), 6334 (SubReg_i16_lane imm:$lane))>; 6335} 6336def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6337 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 6338 (DSubReg_i32_reg imm:$lane))), 6339 (SubReg_i32_lane imm:$lane))>, 6340 Requires<[HasNEON, HasFastVGETLNi32]>; 6341def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 6342 (COPY_TO_REGCLASS 6343 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6344 Requires<[HasNEON, HasSlowVGETLNi32]>; 6345def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6346 (COPY_TO_REGCLASS 6347 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6348 Requires<[HasNEON, HasSlowVGETLNi32]>; 6349let Predicates = [HasNEON] in { 6350def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 6351 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 6352 (SSubReg_f32_reg imm:$src2))>; 6353def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 6354 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 6355 (SSubReg_f32_reg imm:$src2))>; 6356//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 6357// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6358def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 6359 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6360} 6361 6362multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> { 6363 def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane), 6364 (EXTRACT_SUBREG 6365 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6366 (SSubReg_f16_reg imm_even:$lane))>; 6367 def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane), 6368 (EXTRACT_SUBREG 6369 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6370 (SSubReg_f16_reg imm_even:$lane))>; 6371} 6372 6373multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> { 6374 def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane), 6375 (COPY_TO_REGCLASS 6376 (VMOVH (EXTRACT_SUBREG 6377 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6378 (SSubReg_f16_reg imm_odd:$lane))), 6379 HPR)>; 6380 def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane), 6381 (COPY_TO_REGCLASS 6382 (VMOVH (EXTRACT_SUBREG 6383 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6384 (SSubReg_f16_reg imm_odd:$lane))), 6385 HPR)>; 6386} 6387 6388let Predicates = [HasNEON] in { 6389 defm : ExtractEltEvenF16<v4f16, v8f16>; 6390 defm : ExtractEltOddF16VMOVH<v4f16, v8f16>; 6391} 6392 6393let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in { 6394 // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes 6395 defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>; 6396} 6397 6398let Predicates = [HasBF16, HasNEON] in { 6399 defm : ExtractEltEvenF16<v4bf16, v8bf16>; 6400 6401 // Otherwise, if VMOVH is not available resort to extracting the odd lane 6402 // into a GPR and then moving to HPR 6403 def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane), 6404 (COPY_TO_REGCLASS 6405 (VGETLNu16 (v4bf16 DPR:$src), imm:$lane), 6406 HPR)>; 6407 6408 def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane), 6409 (COPY_TO_REGCLASS 6410 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6411 (DSubReg_i16_reg imm:$lane))), 6412 (SubReg_i16_lane imm:$lane)), 6413 HPR)>; 6414} 6415 6416// VMOV : Vector Set Lane (move ARM core register to scalar) 6417 6418let Constraints = "$src1 = $V" in { 6419def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 6420 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 6421 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 6422 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 6423 GPR:$R, imm:$lane))]> { 6424 let Inst{21} = lane{2}; 6425 let Inst{6-5} = lane{1-0}; 6426} 6427def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 6428 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 6429 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 6430 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 6431 GPR:$R, imm:$lane))]> { 6432 let Inst{21} = lane{1}; 6433 let Inst{6} = lane{0}; 6434} 6435def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 6436 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 6437 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 6438 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 6439 GPR:$R, imm:$lane))]>, 6440 Requires<[HasVFP2]> { 6441 let Inst{21} = lane{0}; 6442 // This instruction is equivalent as 6443 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 6444 let isInsertSubreg = 1; 6445} 6446} 6447 6448// TODO: for odd lanes we could optimize this a bit by using the VINS 6449// FullFP16 instruction when it is available 6450multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> { 6451 def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6452 (VT4 (VSETLNi16 DPR:$src1, 6453 (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>; 6454 def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6455 (VT8 (INSERT_SUBREG QPR:$src1, 6456 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6457 (DSubReg_i16_reg imm:$lane))), 6458 (COPY_TO_REGCLASS HPR:$src2, GPR), 6459 (SubReg_i16_lane imm:$lane))), 6460 (DSubReg_i16_reg imm:$lane)))>; 6461} 6462 6463let Predicates = [HasNEON] in { 6464def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 6465 (v16i8 (INSERT_SUBREG QPR:$src1, 6466 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 6467 (DSubReg_i8_reg imm:$lane))), 6468 GPR:$src2, (SubReg_i8_lane imm:$lane))), 6469 (DSubReg_i8_reg imm:$lane)))>; 6470def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 6471 (v8i16 (INSERT_SUBREG QPR:$src1, 6472 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6473 (DSubReg_i16_reg imm:$lane))), 6474 GPR:$src2, (SubReg_i16_lane imm:$lane))), 6475 (DSubReg_i16_reg imm:$lane)))>; 6476def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 6477 (v4i32 (INSERT_SUBREG QPR:$src1, 6478 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 6479 (DSubReg_i32_reg imm:$lane))), 6480 GPR:$src2, (SubReg_i32_lane imm:$lane))), 6481 (DSubReg_i32_reg imm:$lane)))>; 6482 6483def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 6484 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 6485 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6486def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 6487 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 6488 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6489 6490defm : InsertEltF16<f16, v4f16, v8f16>; 6491 6492//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6493// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6494def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6495 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6496 6497def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 6498 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6499def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 6500 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 6501def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 6502 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6503 6504def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 6505 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6506def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 6507 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6508def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 6509 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6510 6511def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 6512 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6513 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6514 dsub_0)>; 6515def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 6516 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6517 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6518 dsub_0)>; 6519def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 6520 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6521 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6522 dsub_0)>; 6523} 6524 6525let Predicates = [HasNEON, HasBF16] in 6526defm : InsertEltF16<bf16, v4bf16, v8bf16>; 6527 6528// VDUP : Vector Duplicate (from ARM core register to all elements) 6529 6530class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6531 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 6532 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6533 [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6534class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6535 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 6536 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6537 [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6538 6539def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 6540def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 6541def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 6542 Requires<[HasNEON, HasFastVDUP32]>; 6543def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 6544def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 6545def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 6546 6547// ARMvdup patterns for uarchs with fast VDUP.32. 6548def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 6549 Requires<[HasNEON,HasFastVDUP32]>; 6550def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>, 6551 Requires<[HasNEON]>; 6552 6553// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 6554def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 6555 Requires<[HasNEON,HasSlowVDUP32]>; 6556def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 6557 Requires<[HasNEON,HasSlowVDUP32]>; 6558 6559// VDUP : Vector Duplicate Lane (from scalar to all elements) 6560 6561class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 6562 ValueType Ty, Operand IdxTy> 6563 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6564 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 6565 [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>; 6566 6567class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 6568 ValueType ResTy, ValueType OpTy, Operand IdxTy> 6569 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6570 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 6571 [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm), 6572 VectorIndex32:$lane)))]>; 6573 6574// Inst{19-16} is partially specified depending on the element size. 6575 6576def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 6577 bits<3> lane; 6578 let Inst{19-17} = lane{2-0}; 6579} 6580def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 6581 bits<2> lane; 6582 let Inst{19-18} = lane{1-0}; 6583} 6584def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 6585 bits<1> lane; 6586 let Inst{19} = lane{0}; 6587} 6588def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 6589 bits<3> lane; 6590 let Inst{19-17} = lane{2-0}; 6591} 6592def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 6593 bits<2> lane; 6594 let Inst{19-18} = lane{1-0}; 6595} 6596def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 6597 bits<1> lane; 6598 let Inst{19} = lane{0}; 6599} 6600 6601let Predicates = [HasNEON] in { 6602def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)), 6603 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6604 6605def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6606 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6607 6608def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6609 (VDUPLN32q DPR:$Vm, imm:$lane)>; 6610 6611def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)), 6612 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 6613 (DSubReg_i8_reg imm:$lane))), 6614 (SubReg_i8_lane imm:$lane)))>; 6615def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)), 6616 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 6617 (DSubReg_i16_reg imm:$lane))), 6618 (SubReg_i16_lane imm:$lane)))>; 6619def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)), 6620 (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src, 6621 (DSubReg_i16_reg imm:$lane))), 6622 (SubReg_i16_lane imm:$lane)))>; 6623def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)), 6624 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 6625 (DSubReg_i32_reg imm:$lane))), 6626 (SubReg_i32_lane imm:$lane)))>; 6627def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)), 6628 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 6629 (DSubReg_i32_reg imm:$lane))), 6630 (SubReg_i32_lane imm:$lane)))>; 6631 6632def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))), 6633 (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6634 (f16 HPR:$src), ssub_0), (i32 0)))>; 6635def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))), 6636 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6637 SPR:$src, ssub_0), (i32 0)))>; 6638def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))), 6639 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6640 SPR:$src, ssub_0), (i32 0)))>; 6641def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))), 6642 (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6643 (f16 HPR:$src), ssub_0), (i32 0)))>; 6644} 6645 6646let Predicates = [HasNEON, HasBF16] in { 6647def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)), 6648 (VDUPLN16d DPR:$Vm, imm:$lane)>; 6649 6650def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)), 6651 (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src, 6652 (DSubReg_i16_reg imm:$lane))), 6653 (SubReg_i16_lane imm:$lane)))>; 6654 6655def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))), 6656 (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6657 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6658def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))), 6659 (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6660 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6661} 6662 6663// VMOVN : Vector Narrowing Move 6664defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 6665 "vmovn", "i", trunc>; 6666// VQMOVN : Vector Saturating Narrowing Move 6667defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 6668 "vqmovn", "s", int_arm_neon_vqmovns>; 6669defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 6670 "vqmovn", "u", int_arm_neon_vqmovnu>; 6671defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 6672 "vqmovun", "s", int_arm_neon_vqmovnsu>; 6673// VMOVL : Vector Lengthening Move 6674defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 6675defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 6676 6677let Predicates = [HasNEON] in { 6678def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 6679def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 6680def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 6681} 6682 6683// Vector Conversions. 6684 6685// VCVT : Vector Convert Between Floating-Point and Integers 6686def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6687 v2i32, v2f32, fp_to_sint>; 6688def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6689 v2i32, v2f32, fp_to_uint>; 6690def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6691 v2f32, v2i32, sint_to_fp>; 6692def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6693 v2f32, v2i32, uint_to_fp>; 6694 6695def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6696 v4i32, v4f32, fp_to_sint>; 6697def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6698 v4i32, v4f32, fp_to_uint>; 6699def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6700 v4f32, v4i32, sint_to_fp>; 6701def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6702 v4f32, v4i32, uint_to_fp>; 6703 6704def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6705 v4i16, v4f16, fp_to_sint>, 6706 Requires<[HasNEON, HasFullFP16]>; 6707def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6708 v4i16, v4f16, fp_to_uint>, 6709 Requires<[HasNEON, HasFullFP16]>; 6710def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6711 v4f16, v4i16, sint_to_fp>, 6712 Requires<[HasNEON, HasFullFP16]>; 6713def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6714 v4f16, v4i16, uint_to_fp>, 6715 Requires<[HasNEON, HasFullFP16]>; 6716 6717def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6718 v8i16, v8f16, fp_to_sint>, 6719 Requires<[HasNEON, HasFullFP16]>; 6720def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6721 v8i16, v8f16, fp_to_uint>, 6722 Requires<[HasNEON, HasFullFP16]>; 6723def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6724 v8f16, v8i16, sint_to_fp>, 6725 Requires<[HasNEON, HasFullFP16]>; 6726def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6727 v8f16, v8i16, uint_to_fp>, 6728 Requires<[HasNEON, HasFullFP16]>; 6729 6730// VCVT{A, N, P, M} 6731multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 6732 SDPatternOperator IntU> { 6733 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6734 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6735 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 6736 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6737 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 6738 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6739 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 6740 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6741 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 6742 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6743 "s16.f16", v4i16, v4f16, IntS>, 6744 Requires<[HasV8, HasNEON, HasFullFP16]>; 6745 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6746 "s16.f16", v8i16, v8f16, IntS>, 6747 Requires<[HasV8, HasNEON, HasFullFP16]>; 6748 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6749 "u16.f16", v4i16, v4f16, IntU>, 6750 Requires<[HasV8, HasNEON, HasFullFP16]>; 6751 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6752 "u16.f16", v8i16, v8f16, IntU>, 6753 Requires<[HasV8, HasNEON, HasFullFP16]>; 6754 } 6755} 6756 6757defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 6758defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 6759defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 6760defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 6761 6762// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 6763let DecoderMethod = "DecodeVCVTD" in { 6764def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6765 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 6766def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6767 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 6768def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6769 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 6770def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6771 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 6772let Predicates = [HasNEON, HasFullFP16] in { 6773def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6774 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; 6775def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6776 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; 6777def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6778 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; 6779def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6780 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; 6781} // Predicates = [HasNEON, HasFullFP16] 6782} 6783 6784let DecoderMethod = "DecodeVCVTQ" in { 6785def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6786 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 6787def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6788 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 6789def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6790 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 6791def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6792 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 6793let Predicates = [HasNEON, HasFullFP16] in { 6794def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6795 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; 6796def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6797 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; 6798def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6799 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; 6800def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6801 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; 6802} // Predicates = [HasNEON, HasFullFP16] 6803} 6804 6805def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 6806 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6807def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 6808 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6809def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 6810 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6811def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 6812 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6813 6814def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 6815 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6816def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 6817 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6818def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 6819 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6820def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 6821 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6822 6823def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", 6824 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6825def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", 6826 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6827def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", 6828 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6829def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", 6830 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6831 6832def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", 6833 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6834def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", 6835 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6836def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", 6837 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6838def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", 6839 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6840 6841 6842// VCVT : Vector Convert Between Half-Precision and Single-Precision. 6843def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 6844 IIC_VUNAQ, "vcvt", "f16.f32", 6845 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 6846 Requires<[HasNEON, HasFP16]>; 6847def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 6848 IIC_VUNAQ, "vcvt", "f32.f16", 6849 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 6850 Requires<[HasNEON, HasFP16]>; 6851 6852// Vector Reverse. 6853 6854// VREV64 : Vector Reverse elements within 64-bit doublewords 6855 6856class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6857 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 6858 (ins DPR:$Vm), IIC_VMOVD, 6859 OpcodeStr, Dt, "$Vd, $Vm", "", 6860 [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>; 6861class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6862 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 6863 (ins QPR:$Vm), IIC_VMOVQ, 6864 OpcodeStr, Dt, "$Vd, $Vm", "", 6865 [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>; 6866 6867def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 6868def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 6869def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 6870let Predicates = [HasNEON] in { 6871def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 6872} 6873 6874def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 6875def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 6876def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 6877 6878let Predicates = [HasNEON] in { 6879 def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), 6880 (VREV64q32 QPR:$Vm)>; 6881 def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), 6882 (VREV64q16 QPR:$Vm)>; 6883 def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), 6884 (VREV64d16 DPR:$Vm)>; 6885} 6886 6887// VREV32 : Vector Reverse elements within 32-bit words 6888 6889class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6890 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 6891 (ins DPR:$Vm), IIC_VMOVD, 6892 OpcodeStr, Dt, "$Vd, $Vm", "", 6893 [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>; 6894class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6895 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 6896 (ins QPR:$Vm), IIC_VMOVQ, 6897 OpcodeStr, Dt, "$Vd, $Vm", "", 6898 [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>; 6899 6900def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 6901def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 6902 6903def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 6904def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 6905 6906let Predicates = [HasNEON] in { 6907 def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))), 6908 (VREV32q16 QPR:$Vm)>; 6909 def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))), 6910 (VREV32d16 DPR:$Vm)>; 6911} 6912 6913// VREV16 : Vector Reverse elements within 16-bit halfwords 6914 6915class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6916 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 6917 (ins DPR:$Vm), IIC_VMOVD, 6918 OpcodeStr, Dt, "$Vd, $Vm", "", 6919 [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>; 6920class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6921 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 6922 (ins QPR:$Vm), IIC_VMOVQ, 6923 OpcodeStr, Dt, "$Vd, $Vm", "", 6924 [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>; 6925 6926def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 6927def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 6928 6929// Other Vector Shuffles. 6930 6931// Aligned extractions: really just dropping registers 6932 6933class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 6934 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 6935 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>, 6936 Requires<[HasNEON]>; 6937 6938def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 6939 6940def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 6941 6942def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 6943 6944def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 6945 6946def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 6947 6948def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16 6949 6950// VEXT : Vector Extract 6951 6952 6953// All of these have a two-operand InstAlias. 6954let TwoOperandAliasConstraint = "$Vn = $Vd" in { 6955class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6956 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 6957 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 6958 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6959 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 6960 (Ty DPR:$Vm), imm:$index)))]> { 6961 bits<3> index; 6962 let Inst{11} = 0b0; 6963 let Inst{10-8} = index{2-0}; 6964} 6965 6966class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6967 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 6968 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 6969 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6970 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 6971 (Ty QPR:$Vm), imm:$index)))]> { 6972 bits<4> index; 6973 let Inst{11-8} = index{3-0}; 6974} 6975} 6976 6977def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 6978 let Inst{10-8} = index{2-0}; 6979} 6980def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 6981 let Inst{10-9} = index{1-0}; 6982 let Inst{8} = 0b0; 6983} 6984let Predicates = [HasNEON] in { 6985def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))), 6986 (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>; 6987} 6988 6989def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 6990 let Inst{10} = index{0}; 6991 let Inst{9-8} = 0b00; 6992} 6993let Predicates = [HasNEON] in { 6994def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))), 6995 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 6996} 6997 6998def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 6999 let Inst{11-8} = index{3-0}; 7000} 7001def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 7002 let Inst{11-9} = index{2-0}; 7003 let Inst{8} = 0b0; 7004} 7005let Predicates = [HasNEON] in { 7006def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))), 7007 (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>; 7008} 7009 7010def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 7011 let Inst{11-10} = index{1-0}; 7012 let Inst{9-8} = 0b00; 7013} 7014def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 7015 let Inst{11} = index{0}; 7016 let Inst{10-8} = 0b000; 7017} 7018let Predicates = [HasNEON] in { 7019def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), 7020 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 7021} 7022 7023// VTRN : Vector Transpose 7024 7025def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 7026def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 7027def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 7028 7029def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 7030def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 7031def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 7032 7033// VUZP : Vector Unzip (Deinterleave) 7034 7035def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 7036def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 7037// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7038def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 7039 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7040 7041def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 7042def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 7043def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 7044 7045// VZIP : Vector Zip (Interleave) 7046 7047def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 7048def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 7049// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7050def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 7051 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7052 7053def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 7054def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 7055def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 7056 7057// Vector Table Lookup and Table Extension. 7058 7059// VTBL : Vector Table Lookup 7060let DecoderMethod = "DecodeTBLInstruction" in { 7061def VTBL1 7062 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 7063 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 7064 "vtbl", "8", "$Vd, $Vn, $Vm", "", 7065 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 7066 7067let hasExtraSrcRegAllocReq = 1 in { 7068def VTBL2 7069 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 7070 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 7071 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7072def VTBL3 7073 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 7074 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 7075 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7076def VTBL4 7077 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 7078 (ins VecListFourD:$Vn, DPR:$Vm), 7079 NVTBLFrm, IIC_VTB4, 7080 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7081} // hasExtraSrcRegAllocReq = 1 7082 7083def VTBL3Pseudo 7084 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 7085def VTBL4Pseudo 7086 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 7087 7088// VTBX : Vector Table Extension 7089def VTBX1 7090 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 7091 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 7092 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 7093 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 7094 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 7095let hasExtraSrcRegAllocReq = 1 in { 7096def VTBX2 7097 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 7098 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 7099 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 7100def VTBX3 7101 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 7102 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 7103 NVTBLFrm, IIC_VTBX3, 7104 "vtbx", "8", "$Vd, $Vn, $Vm", 7105 "$orig = $Vd", []>; 7106def VTBX4 7107 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 7108 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 7109 "vtbx", "8", "$Vd, $Vn, $Vm", 7110 "$orig = $Vd", []>; 7111} // hasExtraSrcRegAllocReq = 1 7112 7113def VTBX3Pseudo 7114 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7115 IIC_VTBX3, "$orig = $dst", []>; 7116def VTBX4Pseudo 7117 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7118 IIC_VTBX4, "$orig = $dst", []>; 7119} // DecoderMethod = "DecodeTBLInstruction" 7120 7121let Predicates = [HasNEON] in { 7122def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), 7123 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7124 v8i8:$Vn1, dsub_1), 7125 v8i8:$Vm))>; 7126def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7127 v8i8:$Vm)), 7128 (v8i8 (VTBX2 v8i8:$orig, 7129 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7130 v8i8:$Vn1, dsub_1), 7131 v8i8:$Vm))>; 7132 7133def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, 7134 v8i8:$Vn2, v8i8:$Vm)), 7135 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7136 v8i8:$Vn1, dsub_1, 7137 v8i8:$Vn2, dsub_2, 7138 (v8i8 (IMPLICIT_DEF)), dsub_3), 7139 v8i8:$Vm))>; 7140def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7141 v8i8:$Vn2, v8i8:$Vm)), 7142 (v8i8 (VTBX3Pseudo v8i8:$orig, 7143 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7144 v8i8:$Vn1, dsub_1, 7145 v8i8:$Vn2, dsub_2, 7146 (v8i8 (IMPLICIT_DEF)), dsub_3), 7147 v8i8:$Vm))>; 7148 7149def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, 7150 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7151 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7152 v8i8:$Vn1, dsub_1, 7153 v8i8:$Vn2, dsub_2, 7154 v8i8:$Vn3, dsub_3), 7155 v8i8:$Vm))>; 7156def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7157 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7158 (v8i8 (VTBX4Pseudo v8i8:$orig, 7159 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7160 v8i8:$Vn1, dsub_1, 7161 v8i8:$Vn2, dsub_2, 7162 v8i8:$Vn3, dsub_3), 7163 v8i8:$Vm))>; 7164} 7165 7166// VRINT : Vector Rounding 7167multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 7168 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 7169 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7170 !strconcat("vrint", op), "f32", 7171 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 7172 let Inst{9-7} = op9_7; 7173 } 7174 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7175 !strconcat("vrint", op), "f32", 7176 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 7177 let Inst{9-7} = op9_7; 7178 } 7179 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7180 !strconcat("vrint", op), "f16", 7181 v4f16, v4f16, Int>, 7182 Requires<[HasV8, HasNEON, HasFullFP16]> { 7183 let Inst{9-7} = op9_7; 7184 } 7185 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7186 !strconcat("vrint", op), "f16", 7187 v8f16, v8f16, Int>, 7188 Requires<[HasV8, HasNEON, HasFullFP16]> { 7189 let Inst{9-7} = op9_7; 7190 } 7191 } 7192 7193 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 7194 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>; 7195 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 7196 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; 7197 let Predicates = [HasNEON, HasFullFP16] in { 7198 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"), 7199 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; 7200 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"), 7201 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; 7202 } 7203} 7204 7205defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 7206defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 7207defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 7208defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 7209defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 7210defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 7211 7212// Cryptography instructions 7213let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 7214 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 7215 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 7216 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7217 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 7218 Requires<[HasV8, HasCrypto]>; 7219 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 7220 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7221 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 7222 Requires<[HasV8, HasCrypto]>; 7223 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7224 SDPatternOperator Int> 7225 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7226 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 7227 Requires<[HasV8, HasCrypto]>; 7228 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7229 SDPatternOperator Int> 7230 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7231 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 7232 Requires<[HasV8, HasCrypto]>; 7233 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 7234 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 7235 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, 7236 Requires<[HasV8, HasCrypto]>; 7237} 7238 7239def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 7240def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 7241def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 7242def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 7243 7244def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 7245def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 7246def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 7247def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 7248def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 7249def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 7250def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 7251def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 7252def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 7253def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 7254 7255let Predicates = [HasNEON] in { 7256def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 7257 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 7258 (SHA1H (SUBREG_TO_REG (i64 0), 7259 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 7260 ssub_0)), 7261 ssub_0)), GPR)>; 7262 7263def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7264 (SHA1C v4i32:$hash_abcd, 7265 (SUBREG_TO_REG (i64 0), 7266 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7267 ssub_0), 7268 v4i32:$wk)>; 7269 7270def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7271 (SHA1M v4i32:$hash_abcd, 7272 (SUBREG_TO_REG (i64 0), 7273 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7274 ssub_0), 7275 v4i32:$wk)>; 7276 7277def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7278 (SHA1P v4i32:$hash_abcd, 7279 (SUBREG_TO_REG (i64 0), 7280 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7281 ssub_0), 7282 v4i32:$wk)>; 7283} 7284 7285//===----------------------------------------------------------------------===// 7286// NEON instructions for single-precision FP math 7287//===----------------------------------------------------------------------===// 7288 7289class N2VSPat<SDNode OpNode, NeonI Inst> 7290 : NEONFPPat<(f32 (OpNode SPR:$a)), 7291 (EXTRACT_SUBREG 7292 (v2f32 (COPY_TO_REGCLASS (Inst 7293 (INSERT_SUBREG 7294 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7295 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 7296 7297class N3VSPat<SDNode OpNode, NeonI Inst> 7298 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 7299 (EXTRACT_SUBREG 7300 (v2f32 (COPY_TO_REGCLASS (Inst 7301 (INSERT_SUBREG 7302 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7303 SPR:$a, ssub_0), 7304 (INSERT_SUBREG 7305 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7306 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7307 7308class N3VSPatFP16<SDNode OpNode, NeonI Inst> 7309 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)), 7310 (EXTRACT_SUBREG 7311 (v4f16 (COPY_TO_REGCLASS (Inst 7312 (INSERT_SUBREG 7313 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7314 HPR:$a, ssub_0), 7315 (INSERT_SUBREG 7316 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7317 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7318 7319class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 7320 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 7321 (EXTRACT_SUBREG 7322 (v2f32 (COPY_TO_REGCLASS (Inst 7323 (INSERT_SUBREG 7324 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7325 SPR:$acc, ssub_0), 7326 (INSERT_SUBREG 7327 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7328 SPR:$a, ssub_0), 7329 (INSERT_SUBREG 7330 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7331 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7332 7333class NVCVTIFPat<SDNode OpNode, NeonI Inst> 7334 : NEONFPPat<(f32 (OpNode GPR:$a)), 7335 (f32 (EXTRACT_SUBREG 7336 (v2f32 (Inst 7337 (INSERT_SUBREG 7338 (v2f32 (IMPLICIT_DEF)), 7339 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 7340 ssub_0))>; 7341class NVCVTFIPat<SDNode OpNode, NeonI Inst> 7342 : NEONFPPat<(i32 (OpNode SPR:$a)), 7343 (i32 (EXTRACT_SUBREG 7344 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 7345 SPR:$a, ssub_0))), 7346 ssub_0))>; 7347 7348def : N3VSPat<fadd, VADDfd>; 7349def : N3VSPat<fsub, VSUBfd>; 7350def : N3VSPat<fmul, VMULfd>; 7351def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 7352 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7353def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 7354 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7355def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 7356 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7357def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 7358 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7359def : N2VSPat<fabs, VABSfd>; 7360def : N2VSPat<fneg, VNEGfd>; 7361def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>; 7362def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>; 7363def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>; 7364def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>; 7365def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 7366def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 7367def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 7368def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 7369 7370// NEON doesn't have any f64 conversions, so provide patterns to make 7371// sure the VFP conversions match when extracting from a vector. 7372def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7373 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7374def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7375 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7376def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7377 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7378def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7379 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7380 7381 7382// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 7383def : Pat<(f32 (bitconvert GPR:$a)), 7384 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7385 Requires<[HasNEON, DontUseVMOVSR]>; 7386def : Pat<(arm_vmovsr GPR:$a), 7387 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7388 Requires<[HasNEON, DontUseVMOVSR]>; 7389 7390//===----------------------------------------------------------------------===// 7391// Non-Instruction Patterns or Endianess - Revert Patterns 7392//===----------------------------------------------------------------------===// 7393 7394// bit_convert 7395// 64 bit conversions 7396let Predicates = [HasNEON] in { 7397def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 7398def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 7399 7400def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 7401def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 7402 7403def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>; 7404def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>; 7405 7406def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16 DPR:$src)>; 7407def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16 DPR:$src)>; 7408 7409// 128 bit conversions 7410def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 7411def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 7412 7413def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 7414def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 7415 7416def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; 7417def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>; 7418 7419def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16 QPR:$src)>; 7420def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16 QPR:$src)>; 7421} 7422 7423let Predicates = [IsLE,HasNEON] in { 7424 // 64 bit conversions 7425 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 7426 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 7427 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>; 7428 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (f64 DPR:$src)>; 7429 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 7430 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 7431 7432 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 7433 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 7434 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>; 7435 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>; 7436 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 7437 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 7438 7439 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 7440 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 7441 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>; 7442 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>; 7443 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 7444 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 7445 7446 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 7447 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 7448 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>; 7449 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>; 7450 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 7451 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 7452 7453 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>; 7454 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>; 7455 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>; 7456 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>; 7457 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>; 7458 7459 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (v4bf16 DPR:$src)>; 7460 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>; 7461 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>; 7462 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>; 7463 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (v4bf16 DPR:$src)>; 7464 7465 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 7466 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 7467 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 7468 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 7469 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 7470 7471 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 7472 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 7473 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 7474 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 7475 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>; 7476 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (v8i8 DPR:$src)>; 7477 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 7478 7479 // 128 bit conversions 7480 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 7481 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 7482 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; 7483 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>; 7484 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 7485 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 7486 7487 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 7488 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 7489 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>; 7490 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>; 7491 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 7492 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 7493 7494 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 7495 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 7496 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>; 7497 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>; 7498 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 7499 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 7500 7501 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 7502 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 7503 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>; 7504 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>; 7505 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 7506 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 7507 7508 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; 7509 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>; 7510 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>; 7511 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>; 7512 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>; 7513 7514 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>; 7515 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>; 7516 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>; 7517 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>; 7518 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>; 7519 7520 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 7521 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 7522 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 7523 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 7524 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 7525 7526 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 7527 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 7528 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 7529 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 7530 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; 7531 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>; 7532 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 7533} 7534 7535let Predicates = [IsBE,HasNEON] in { 7536 // 64 bit conversions 7537 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7538 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7539 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7540 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7541 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7542 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7543 7544 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7545 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7546 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7547 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7548 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7549 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7550 7551 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7552 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7553 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7554 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7555 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7556 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7557 7558 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7559 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7560 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7561 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7562 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7563 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7564 7565 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7566 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7567 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7568 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7569 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7570 7571 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7572 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7573 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7574 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7575 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7576 7577 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7578 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7579 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7580 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7581 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7582 7583 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 7584 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 7585 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 7586 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 7587 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>; 7588 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (VREV16d8 DPR:$src)>; 7589 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 7590 7591 // 128 bit conversions 7592 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7593 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7594 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7595 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7596 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7597 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7598 7599 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7600 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7601 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7602 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7603 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7604 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7605 7606 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7607 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7608 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7609 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7610 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7611 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7612 7613 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7614 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7615 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7616 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7617 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7618 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7619 7620 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7621 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7622 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7623 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7624 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7625 7626 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7627 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7628 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7629 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7630 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7631 7632 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7633 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7634 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7635 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7636 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7637 7638 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 7639 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 7640 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 7641 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 7642 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>; 7643 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8 QPR:$src)>; 7644 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 7645} 7646 7647let Predicates = [HasNEON] in { 7648 // Here we match the specific SDNode type 'ARMVectorRegCastImpl' 7649 // rather than the more general 'ARMVectorRegCast' which would also 7650 // match some bitconverts. If we use the latter in cases where the 7651 // input and output types are the same, the bitconvert gets elided 7652 // and we end up generating a nonsense match of nothing. 7653 7654 foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7655 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7656 def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>; 7657 7658 foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7659 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7660 def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>; 7661} 7662 7663// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian 7664let Predicates = [IsBE,HasNEON] in { 7665def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 7666 (VREV64q8 (VLD1q8 addrmode6:$addr))>; 7667def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7668 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>; 7669def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 7670 (VREV64q16 (VLD1q16 addrmode6:$addr))>; 7671def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7672 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>; 7673} 7674 7675// Fold extracting an element out of a v2i32 into a vfp register. 7676def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 7677 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>, 7678 Requires<[HasNEON]>; 7679 7680// Vector lengthening move with load, matching extending loads. 7681 7682// extload, zextload and sextload for a standard lengthening load. Example: 7683// Lengthen_Single<"8", "i16", "8"> = 7684// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 7685// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 7686// (f64 (IMPLICIT_DEF)), (i32 0)))>; 7687multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 7688 let AddedComplexity = 10 in { 7689 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7690 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 7691 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7692 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7693 Requires<[HasNEON]>; 7694 7695 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7696 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 7697 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7698 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7699 Requires<[HasNEON]>; 7700 7701 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7702 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 7703 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 7704 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7705 Requires<[HasNEON]>; 7706 } 7707} 7708 7709// extload, zextload and sextload for a lengthening load which only uses 7710// half the lanes available. Example: 7711// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 7712// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 7713// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7714// (f64 (IMPLICIT_DEF)), (i32 0))), 7715// dsub_0)>; 7716multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 7717 string InsnLanes, string InsnTy> { 7718 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7719 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7720 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7721 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7722 dsub_0)>, 7723 Requires<[HasNEON]>; 7724 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7725 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7726 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7727 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7728 dsub_0)>, 7729 Requires<[HasNEON]>; 7730 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7731 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7732 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7733 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7734 dsub_0)>, 7735 Requires<[HasNEON]>; 7736} 7737 7738// The following class definition is basically a copy of the 7739// Lengthen_HalfSingle definition above, however with an additional parameter 7740// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7741// data loaded by VLD1LN into proper vector format in big endian mode. 7742multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7743 string InsnLanes, string InsnTy, string RevLanes> { 7744 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7745 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7746 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7747 (!cast<Instruction>("VREV32d" # RevLanes) 7748 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7749 dsub_0)>, 7750 Requires<[HasNEON]>; 7751 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7752 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7753 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7754 (!cast<Instruction>("VREV32d" # RevLanes) 7755 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7756 dsub_0)>, 7757 Requires<[HasNEON]>; 7758 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7759 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7760 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7761 (!cast<Instruction>("VREV32d" # RevLanes) 7762 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7763 dsub_0)>, 7764 Requires<[HasNEON]>; 7765} 7766 7767// extload, zextload and sextload for a lengthening load followed by another 7768// lengthening load, to quadruple the initial length. 7769// 7770// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 7771// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 7772// (EXTRACT_SUBREG (VMOVLuv4i32 7773// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7774// (f64 (IMPLICIT_DEF)), 7775// (i32 0))), 7776// dsub_0)), 7777// dsub_0)>; 7778multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 7779 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7780 string Insn2Ty> { 7781 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7782 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7783 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7784 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7785 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7786 dsub_0))>, 7787 Requires<[HasNEON]>; 7788 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7789 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7790 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7791 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7792 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7793 dsub_0))>, 7794 Requires<[HasNEON]>; 7795 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7796 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7797 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7798 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7799 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7800 dsub_0))>, 7801 Requires<[HasNEON]>; 7802} 7803 7804// The following class definition is basically a copy of the 7805// Lengthen_Double definition above, however with an additional parameter 7806// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7807// data loaded by VLD1LN into proper vector format in big endian mode. 7808multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7809 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7810 string Insn2Ty, string RevLanes> { 7811 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7812 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7813 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7814 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7815 (!cast<Instruction>("VREV32d" # RevLanes) 7816 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7817 dsub_0))>, 7818 Requires<[HasNEON]>; 7819 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7820 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7821 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7822 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7823 (!cast<Instruction>("VREV32d" # RevLanes) 7824 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7825 dsub_0))>, 7826 Requires<[HasNEON]>; 7827 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7828 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7829 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7830 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7831 (!cast<Instruction>("VREV32d" # RevLanes) 7832 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7833 dsub_0))>, 7834 Requires<[HasNEON]>; 7835} 7836 7837// extload, zextload and sextload for a lengthening load followed by another 7838// lengthening load, to quadruple the initial length, but which ends up only 7839// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 7840// 7841// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 7842// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 7843// (EXTRACT_SUBREG (VMOVLuv4i32 7844// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 7845// (f64 (IMPLICIT_DEF)), (i32 0))), 7846// dsub_0)), 7847// dsub_0)>; 7848multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 7849 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7850 string Insn2Ty> { 7851 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7852 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7853 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7854 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7855 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7856 dsub_0)), 7857 dsub_0)>, 7858 Requires<[HasNEON]>; 7859 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7860 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7861 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7862 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7863 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7864 dsub_0)), 7865 dsub_0)>, 7866 Requires<[HasNEON]>; 7867 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7868 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7869 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7870 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7871 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7872 dsub_0)), 7873 dsub_0)>, 7874 Requires<[HasNEON]>; 7875} 7876 7877// The following class definition is basically a copy of the 7878// Lengthen_HalfDouble definition above, however with an additional VREV16d8 7879// instruction to convert data loaded by VLD1LN into proper vector format 7880// in big endian mode. 7881multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7882 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7883 string Insn2Ty> { 7884 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7885 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7886 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7887 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7888 (!cast<Instruction>("VREV16d8") 7889 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7890 dsub_0)), 7891 dsub_0)>, 7892 Requires<[HasNEON]>; 7893 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7894 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7895 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7896 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7897 (!cast<Instruction>("VREV16d8") 7898 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7899 dsub_0)), 7900 dsub_0)>, 7901 Requires<[HasNEON]>; 7902 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7903 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7904 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7905 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7906 (!cast<Instruction>("VREV16d8") 7907 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7908 dsub_0)), 7909 dsub_0)>, 7910 Requires<[HasNEON]>; 7911} 7912 7913defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 7914defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 7915defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 7916 7917let Predicates = [HasNEON,IsLE] in { 7918 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 7919 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 7920 7921 // Double lengthening - v4i8 -> v4i16 -> v4i32 7922 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 7923 // v2i8 -> v2i16 -> v2i32 7924 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 7925 // v2i16 -> v2i32 -> v2i64 7926 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 7927} 7928 7929let Predicates = [HasNEON,IsBE] in { 7930 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 7931 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 7932 7933 // Double lengthening - v4i8 -> v4i16 -> v4i32 7934 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 7935 // v2i8 -> v2i16 -> v2i32 7936 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 7937 // v2i16 -> v2i32 -> v2i64 7938 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 7939} 7940 7941// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 7942let Predicates = [HasNEON,IsLE] in { 7943 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7944 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7945 (VLD1LNd16 addrmode6:$addr, 7946 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7947 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7948 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7949 (VLD1LNd16 addrmode6:$addr, 7950 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7951 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7952 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7953 (VLD1LNd16 addrmode6:$addr, 7954 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7955} 7956// The following patterns are basically a copy of the patterns above, 7957// however with an additional VREV16d instruction to convert data 7958// loaded by VLD1LN into proper vector format in big endian mode. 7959let Predicates = [HasNEON,IsBE] in { 7960 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7961 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7962 (!cast<Instruction>("VREV16d8") 7963 (VLD1LNd16 addrmode6:$addr, 7964 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7965 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7966 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7967 (!cast<Instruction>("VREV16d8") 7968 (VLD1LNd16 addrmode6:$addr, 7969 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7970 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7971 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7972 (!cast<Instruction>("VREV16d8") 7973 (VLD1LNd16 addrmode6:$addr, 7974 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7975} 7976 7977let Predicates = [HasNEON] in { 7978def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), 7979 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7980def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), 7981 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7982def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), 7983 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7984def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), 7985 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7986def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), 7987 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7988def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)), 7989 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7990def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)), 7991 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7992} 7993 7994//===----------------------------------------------------------------------===// 7995// Assembler aliases 7996// 7997 7998def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 7999 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 8000def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 8001 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 8002 8003// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 8004defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 8005 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8006defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 8007 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8008defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8009 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8010defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8011 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8012defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8013 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8014defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8015 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8016defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8017 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8018defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8019 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8020// ... two-operand aliases 8021defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8022 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8023defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8024 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8025defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8026 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8027defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8028 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8029defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8030 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8031defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8032 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8033// ... immediates 8034def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8035 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8036def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8037 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8038def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8039 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8040def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8041 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8042 8043 8044// VLD1 single-lane pseudo-instructions. These need special handling for 8045// the lane index that an InstAlias can't handle, so we use these instead. 8046def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 8047 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8048 pred:$p)>; 8049def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 8050 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8051 pred:$p)>; 8052def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 8053 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8054 pred:$p)>; 8055 8056def VLD1LNdWB_fixed_Asm_8 : 8057 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 8058 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8059 pred:$p)>; 8060def VLD1LNdWB_fixed_Asm_16 : 8061 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 8062 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8063 pred:$p)>; 8064def VLD1LNdWB_fixed_Asm_32 : 8065 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 8066 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8067 pred:$p)>; 8068def VLD1LNdWB_register_Asm_8 : 8069 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 8070 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8071 rGPR:$Rm, pred:$p)>; 8072def VLD1LNdWB_register_Asm_16 : 8073 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 8074 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8075 rGPR:$Rm, pred:$p)>; 8076def VLD1LNdWB_register_Asm_32 : 8077 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 8078 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8079 rGPR:$Rm, pred:$p)>; 8080 8081 8082// VST1 single-lane pseudo-instructions. These need special handling for 8083// the lane index that an InstAlias can't handle, so we use these instead. 8084def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 8085 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8086 pred:$p)>; 8087def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 8088 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8089 pred:$p)>; 8090def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 8091 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8092 pred:$p)>; 8093 8094def VST1LNdWB_fixed_Asm_8 : 8095 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 8096 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8097 pred:$p)>; 8098def VST1LNdWB_fixed_Asm_16 : 8099 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 8100 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8101 pred:$p)>; 8102def VST1LNdWB_fixed_Asm_32 : 8103 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 8104 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8105 pred:$p)>; 8106def VST1LNdWB_register_Asm_8 : 8107 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 8108 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8109 rGPR:$Rm, pred:$p)>; 8110def VST1LNdWB_register_Asm_16 : 8111 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 8112 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8113 rGPR:$Rm, pred:$p)>; 8114def VST1LNdWB_register_Asm_32 : 8115 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 8116 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8117 rGPR:$Rm, pred:$p)>; 8118 8119// VLD2 single-lane pseudo-instructions. These need special handling for 8120// the lane index that an InstAlias can't handle, so we use these instead. 8121def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 8122 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8123 pred:$p)>; 8124def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8125 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8126 pred:$p)>; 8127def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8128 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 8129def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8130 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8131 pred:$p)>; 8132def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8133 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8134 pred:$p)>; 8135 8136def VLD2LNdWB_fixed_Asm_8 : 8137 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 8138 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8139 pred:$p)>; 8140def VLD2LNdWB_fixed_Asm_16 : 8141 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8142 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8143 pred:$p)>; 8144def VLD2LNdWB_fixed_Asm_32 : 8145 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8146 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8147 pred:$p)>; 8148def VLD2LNqWB_fixed_Asm_16 : 8149 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8150 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8151 pred:$p)>; 8152def VLD2LNqWB_fixed_Asm_32 : 8153 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8154 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8155 pred:$p)>; 8156def VLD2LNdWB_register_Asm_8 : 8157 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 8158 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8159 rGPR:$Rm, pred:$p)>; 8160def VLD2LNdWB_register_Asm_16 : 8161 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8162 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8163 rGPR:$Rm, pred:$p)>; 8164def VLD2LNdWB_register_Asm_32 : 8165 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8166 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8167 rGPR:$Rm, pred:$p)>; 8168def VLD2LNqWB_register_Asm_16 : 8169 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8170 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8171 rGPR:$Rm, pred:$p)>; 8172def VLD2LNqWB_register_Asm_32 : 8173 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8174 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8175 rGPR:$Rm, pred:$p)>; 8176 8177 8178// VST2 single-lane pseudo-instructions. These need special handling for 8179// the lane index that an InstAlias can't handle, so we use these instead. 8180def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 8181 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8182 pred:$p)>; 8183def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8184 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8185 pred:$p)>; 8186def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8187 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8188 pred:$p)>; 8189def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8190 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8191 pred:$p)>; 8192def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8193 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8194 pred:$p)>; 8195 8196def VST2LNdWB_fixed_Asm_8 : 8197 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 8198 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8199 pred:$p)>; 8200def VST2LNdWB_fixed_Asm_16 : 8201 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8202 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8203 pred:$p)>; 8204def VST2LNdWB_fixed_Asm_32 : 8205 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8206 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8207 pred:$p)>; 8208def VST2LNqWB_fixed_Asm_16 : 8209 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8210 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8211 pred:$p)>; 8212def VST2LNqWB_fixed_Asm_32 : 8213 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8214 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8215 pred:$p)>; 8216def VST2LNdWB_register_Asm_8 : 8217 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 8218 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8219 rGPR:$Rm, pred:$p)>; 8220def VST2LNdWB_register_Asm_16 : 8221 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8222 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8223 rGPR:$Rm, pred:$p)>; 8224def VST2LNdWB_register_Asm_32 : 8225 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8226 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8227 rGPR:$Rm, pred:$p)>; 8228def VST2LNqWB_register_Asm_16 : 8229 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8230 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8231 rGPR:$Rm, pred:$p)>; 8232def VST2LNqWB_register_Asm_32 : 8233 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8234 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8235 rGPR:$Rm, pred:$p)>; 8236 8237// VLD3 all-lanes pseudo-instructions. These need special handling for 8238// the lane index that an InstAlias can't handle, so we use these instead. 8239def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8240 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8241 pred:$p)>; 8242def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8243 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8244 pred:$p)>; 8245def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8246 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8247 pred:$p)>; 8248def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8249 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8250 pred:$p)>; 8251def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8252 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8253 pred:$p)>; 8254def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8255 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8256 pred:$p)>; 8257 8258def VLD3DUPdWB_fixed_Asm_8 : 8259 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8260 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8261 pred:$p)>; 8262def VLD3DUPdWB_fixed_Asm_16 : 8263 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8264 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8265 pred:$p)>; 8266def VLD3DUPdWB_fixed_Asm_32 : 8267 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8268 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8269 pred:$p)>; 8270def VLD3DUPqWB_fixed_Asm_8 : 8271 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8272 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8273 pred:$p)>; 8274def VLD3DUPqWB_fixed_Asm_16 : 8275 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8276 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8277 pred:$p)>; 8278def VLD3DUPqWB_fixed_Asm_32 : 8279 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8280 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8281 pred:$p)>; 8282def VLD3DUPdWB_register_Asm_8 : 8283 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8284 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8285 rGPR:$Rm, pred:$p)>; 8286def VLD3DUPdWB_register_Asm_16 : 8287 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8288 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8289 rGPR:$Rm, pred:$p)>; 8290def VLD3DUPdWB_register_Asm_32 : 8291 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8292 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8293 rGPR:$Rm, pred:$p)>; 8294def VLD3DUPqWB_register_Asm_8 : 8295 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8296 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8297 rGPR:$Rm, pred:$p)>; 8298def VLD3DUPqWB_register_Asm_16 : 8299 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8300 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8301 rGPR:$Rm, pred:$p)>; 8302def VLD3DUPqWB_register_Asm_32 : 8303 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8304 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8305 rGPR:$Rm, pred:$p)>; 8306 8307 8308// VLD3 single-lane pseudo-instructions. These need special handling for 8309// the lane index that an InstAlias can't handle, so we use these instead. 8310def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8311 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8312 pred:$p)>; 8313def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8314 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8315 pred:$p)>; 8316def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8317 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8318 pred:$p)>; 8319def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8320 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8321 pred:$p)>; 8322def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8323 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8324 pred:$p)>; 8325 8326def VLD3LNdWB_fixed_Asm_8 : 8327 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8328 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8329 pred:$p)>; 8330def VLD3LNdWB_fixed_Asm_16 : 8331 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8332 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8333 pred:$p)>; 8334def VLD3LNdWB_fixed_Asm_32 : 8335 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8336 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8337 pred:$p)>; 8338def VLD3LNqWB_fixed_Asm_16 : 8339 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8340 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8341 pred:$p)>; 8342def VLD3LNqWB_fixed_Asm_32 : 8343 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8344 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8345 pred:$p)>; 8346def VLD3LNdWB_register_Asm_8 : 8347 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8348 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8349 rGPR:$Rm, pred:$p)>; 8350def VLD3LNdWB_register_Asm_16 : 8351 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8352 (ins VecListThreeDHWordIndexed:$list, 8353 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8354def VLD3LNdWB_register_Asm_32 : 8355 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8356 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8357 rGPR:$Rm, pred:$p)>; 8358def VLD3LNqWB_register_Asm_16 : 8359 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8360 (ins VecListThreeQHWordIndexed:$list, 8361 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8362def VLD3LNqWB_register_Asm_32 : 8363 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8364 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8365 rGPR:$Rm, pred:$p)>; 8366 8367// VLD3 multiple structure pseudo-instructions. These need special handling for 8368// the vector operands that the normal instructions don't yet model. 8369// FIXME: Remove these when the register classes and instructions are updated. 8370def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8371 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8372def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8373 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8374def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8375 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8376def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8377 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8378def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8379 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8380def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8381 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8382 8383def VLD3dWB_fixed_Asm_8 : 8384 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8385 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8386def VLD3dWB_fixed_Asm_16 : 8387 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8388 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8389def VLD3dWB_fixed_Asm_32 : 8390 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8391 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8392def VLD3qWB_fixed_Asm_8 : 8393 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8394 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8395def VLD3qWB_fixed_Asm_16 : 8396 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8397 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8398def VLD3qWB_fixed_Asm_32 : 8399 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8400 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8401def VLD3dWB_register_Asm_8 : 8402 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8403 (ins VecListThreeD:$list, addrmode6align64:$addr, 8404 rGPR:$Rm, pred:$p)>; 8405def VLD3dWB_register_Asm_16 : 8406 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8407 (ins VecListThreeD:$list, addrmode6align64:$addr, 8408 rGPR:$Rm, pred:$p)>; 8409def VLD3dWB_register_Asm_32 : 8410 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8411 (ins VecListThreeD:$list, addrmode6align64:$addr, 8412 rGPR:$Rm, pred:$p)>; 8413def VLD3qWB_register_Asm_8 : 8414 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8415 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8416 rGPR:$Rm, pred:$p)>; 8417def VLD3qWB_register_Asm_16 : 8418 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8419 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8420 rGPR:$Rm, pred:$p)>; 8421def VLD3qWB_register_Asm_32 : 8422 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8423 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8424 rGPR:$Rm, pred:$p)>; 8425 8426// VST3 single-lane pseudo-instructions. These need special handling for 8427// the lane index that an InstAlias can't handle, so we use these instead. 8428def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8429 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8430 pred:$p)>; 8431def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8432 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8433 pred:$p)>; 8434def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8435 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8436 pred:$p)>; 8437def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8438 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8439 pred:$p)>; 8440def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8441 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8442 pred:$p)>; 8443 8444def VST3LNdWB_fixed_Asm_8 : 8445 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8446 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8447 pred:$p)>; 8448def VST3LNdWB_fixed_Asm_16 : 8449 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8450 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8451 pred:$p)>; 8452def VST3LNdWB_fixed_Asm_32 : 8453 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8454 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8455 pred:$p)>; 8456def VST3LNqWB_fixed_Asm_16 : 8457 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8458 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8459 pred:$p)>; 8460def VST3LNqWB_fixed_Asm_32 : 8461 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8462 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8463 pred:$p)>; 8464def VST3LNdWB_register_Asm_8 : 8465 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8466 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8467 rGPR:$Rm, pred:$p)>; 8468def VST3LNdWB_register_Asm_16 : 8469 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8470 (ins VecListThreeDHWordIndexed:$list, 8471 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8472def VST3LNdWB_register_Asm_32 : 8473 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8474 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8475 rGPR:$Rm, pred:$p)>; 8476def VST3LNqWB_register_Asm_16 : 8477 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8478 (ins VecListThreeQHWordIndexed:$list, 8479 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8480def VST3LNqWB_register_Asm_32 : 8481 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8482 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8483 rGPR:$Rm, pred:$p)>; 8484 8485 8486// VST3 multiple structure pseudo-instructions. These need special handling for 8487// the vector operands that the normal instructions don't yet model. 8488// FIXME: Remove these when the register classes and instructions are updated. 8489def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8490 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8491def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8492 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8493def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8494 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8495def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8496 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8497def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8498 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8499def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8500 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8501 8502def VST3dWB_fixed_Asm_8 : 8503 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8504 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8505def VST3dWB_fixed_Asm_16 : 8506 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8507 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8508def VST3dWB_fixed_Asm_32 : 8509 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8510 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8511def VST3qWB_fixed_Asm_8 : 8512 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8513 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8514def VST3qWB_fixed_Asm_16 : 8515 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8516 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8517def VST3qWB_fixed_Asm_32 : 8518 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8519 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8520def VST3dWB_register_Asm_8 : 8521 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8522 (ins VecListThreeD:$list, addrmode6align64:$addr, 8523 rGPR:$Rm, pred:$p)>; 8524def VST3dWB_register_Asm_16 : 8525 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8526 (ins VecListThreeD:$list, addrmode6align64:$addr, 8527 rGPR:$Rm, pred:$p)>; 8528def VST3dWB_register_Asm_32 : 8529 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8530 (ins VecListThreeD:$list, addrmode6align64:$addr, 8531 rGPR:$Rm, pred:$p)>; 8532def VST3qWB_register_Asm_8 : 8533 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8534 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8535 rGPR:$Rm, pred:$p)>; 8536def VST3qWB_register_Asm_16 : 8537 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8538 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8539 rGPR:$Rm, pred:$p)>; 8540def VST3qWB_register_Asm_32 : 8541 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8542 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8543 rGPR:$Rm, pred:$p)>; 8544 8545// VLD4 all-lanes pseudo-instructions. These need special handling for 8546// the lane index that an InstAlias can't handle, so we use these instead. 8547def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8548 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8549 pred:$p)>; 8550def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8551 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8552 pred:$p)>; 8553def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8554 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8555 pred:$p)>; 8556def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8557 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8558 pred:$p)>; 8559def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8560 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8561 pred:$p)>; 8562def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8563 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8564 pred:$p)>; 8565 8566def VLD4DUPdWB_fixed_Asm_8 : 8567 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8568 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8569 pred:$p)>; 8570def VLD4DUPdWB_fixed_Asm_16 : 8571 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8572 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8573 pred:$p)>; 8574def VLD4DUPdWB_fixed_Asm_32 : 8575 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8576 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8577 pred:$p)>; 8578def VLD4DUPqWB_fixed_Asm_8 : 8579 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8580 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8581 pred:$p)>; 8582def VLD4DUPqWB_fixed_Asm_16 : 8583 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8584 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8585 pred:$p)>; 8586def VLD4DUPqWB_fixed_Asm_32 : 8587 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8588 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8589 pred:$p)>; 8590def VLD4DUPdWB_register_Asm_8 : 8591 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8592 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8593 rGPR:$Rm, pred:$p)>; 8594def VLD4DUPdWB_register_Asm_16 : 8595 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8596 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8597 rGPR:$Rm, pred:$p)>; 8598def VLD4DUPdWB_register_Asm_32 : 8599 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8600 (ins VecListFourDAllLanes:$list, 8601 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8602def VLD4DUPqWB_register_Asm_8 : 8603 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8604 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8605 rGPR:$Rm, pred:$p)>; 8606def VLD4DUPqWB_register_Asm_16 : 8607 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8608 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8609 rGPR:$Rm, pred:$p)>; 8610def VLD4DUPqWB_register_Asm_32 : 8611 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8612 (ins VecListFourQAllLanes:$list, 8613 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8614 8615 8616// VLD4 single-lane pseudo-instructions. These need special handling for 8617// the lane index that an InstAlias can't handle, so we use these instead. 8618def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8619 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8620 pred:$p)>; 8621def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8622 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8623 pred:$p)>; 8624def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8625 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8626 pred:$p)>; 8627def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8628 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8629 pred:$p)>; 8630def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8631 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8632 pred:$p)>; 8633 8634def VLD4LNdWB_fixed_Asm_8 : 8635 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8636 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8637 pred:$p)>; 8638def VLD4LNdWB_fixed_Asm_16 : 8639 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8640 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8641 pred:$p)>; 8642def VLD4LNdWB_fixed_Asm_32 : 8643 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8644 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8645 pred:$p)>; 8646def VLD4LNqWB_fixed_Asm_16 : 8647 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8648 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8649 pred:$p)>; 8650def VLD4LNqWB_fixed_Asm_32 : 8651 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8652 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8653 pred:$p)>; 8654def VLD4LNdWB_register_Asm_8 : 8655 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8656 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8657 rGPR:$Rm, pred:$p)>; 8658def VLD4LNdWB_register_Asm_16 : 8659 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8660 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8661 rGPR:$Rm, pred:$p)>; 8662def VLD4LNdWB_register_Asm_32 : 8663 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8664 (ins VecListFourDWordIndexed:$list, 8665 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8666def VLD4LNqWB_register_Asm_16 : 8667 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8668 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8669 rGPR:$Rm, pred:$p)>; 8670def VLD4LNqWB_register_Asm_32 : 8671 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8672 (ins VecListFourQWordIndexed:$list, 8673 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8674 8675 8676 8677// VLD4 multiple structure pseudo-instructions. These need special handling for 8678// the vector operands that the normal instructions don't yet model. 8679// FIXME: Remove these when the register classes and instructions are updated. 8680def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8681 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8682 pred:$p)>; 8683def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8684 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8685 pred:$p)>; 8686def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8687 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8688 pred:$p)>; 8689def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8690 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8691 pred:$p)>; 8692def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8693 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8694 pred:$p)>; 8695def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8696 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8697 pred:$p)>; 8698 8699def VLD4dWB_fixed_Asm_8 : 8700 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8701 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8702 pred:$p)>; 8703def VLD4dWB_fixed_Asm_16 : 8704 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8705 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8706 pred:$p)>; 8707def VLD4dWB_fixed_Asm_32 : 8708 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8709 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8710 pred:$p)>; 8711def VLD4qWB_fixed_Asm_8 : 8712 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8713 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8714 pred:$p)>; 8715def VLD4qWB_fixed_Asm_16 : 8716 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8717 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8718 pred:$p)>; 8719def VLD4qWB_fixed_Asm_32 : 8720 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8721 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8722 pred:$p)>; 8723def VLD4dWB_register_Asm_8 : 8724 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8725 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8726 rGPR:$Rm, pred:$p)>; 8727def VLD4dWB_register_Asm_16 : 8728 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8729 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8730 rGPR:$Rm, pred:$p)>; 8731def VLD4dWB_register_Asm_32 : 8732 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8733 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8734 rGPR:$Rm, pred:$p)>; 8735def VLD4qWB_register_Asm_8 : 8736 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8737 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8738 rGPR:$Rm, pred:$p)>; 8739def VLD4qWB_register_Asm_16 : 8740 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8741 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8742 rGPR:$Rm, pred:$p)>; 8743def VLD4qWB_register_Asm_32 : 8744 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8745 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8746 rGPR:$Rm, pred:$p)>; 8747 8748// VST4 single-lane pseudo-instructions. These need special handling for 8749// the lane index that an InstAlias can't handle, so we use these instead. 8750def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8751 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8752 pred:$p)>; 8753def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8754 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8755 pred:$p)>; 8756def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8757 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8758 pred:$p)>; 8759def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8760 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8761 pred:$p)>; 8762def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8763 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8764 pred:$p)>; 8765 8766def VST4LNdWB_fixed_Asm_8 : 8767 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8768 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8769 pred:$p)>; 8770def VST4LNdWB_fixed_Asm_16 : 8771 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8772 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8773 pred:$p)>; 8774def VST4LNdWB_fixed_Asm_32 : 8775 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8776 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8777 pred:$p)>; 8778def VST4LNqWB_fixed_Asm_16 : 8779 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8780 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8781 pred:$p)>; 8782def VST4LNqWB_fixed_Asm_32 : 8783 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8784 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8785 pred:$p)>; 8786def VST4LNdWB_register_Asm_8 : 8787 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8788 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8789 rGPR:$Rm, pred:$p)>; 8790def VST4LNdWB_register_Asm_16 : 8791 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8792 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8793 rGPR:$Rm, pred:$p)>; 8794def VST4LNdWB_register_Asm_32 : 8795 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8796 (ins VecListFourDWordIndexed:$list, 8797 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8798def VST4LNqWB_register_Asm_16 : 8799 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8800 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8801 rGPR:$Rm, pred:$p)>; 8802def VST4LNqWB_register_Asm_32 : 8803 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8804 (ins VecListFourQWordIndexed:$list, 8805 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8806 8807 8808// VST4 multiple structure pseudo-instructions. These need special handling for 8809// the vector operands that the normal instructions don't yet model. 8810// FIXME: Remove these when the register classes and instructions are updated. 8811def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8812 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8813 pred:$p)>; 8814def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8815 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8816 pred:$p)>; 8817def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8818 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8819 pred:$p)>; 8820def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8821 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8822 pred:$p)>; 8823def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8824 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8825 pred:$p)>; 8826def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8827 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8828 pred:$p)>; 8829 8830def VST4dWB_fixed_Asm_8 : 8831 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8832 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8833 pred:$p)>; 8834def VST4dWB_fixed_Asm_16 : 8835 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8836 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8837 pred:$p)>; 8838def VST4dWB_fixed_Asm_32 : 8839 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8840 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8841 pred:$p)>; 8842def VST4qWB_fixed_Asm_8 : 8843 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8844 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8845 pred:$p)>; 8846def VST4qWB_fixed_Asm_16 : 8847 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8848 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8849 pred:$p)>; 8850def VST4qWB_fixed_Asm_32 : 8851 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8852 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8853 pred:$p)>; 8854def VST4dWB_register_Asm_8 : 8855 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8856 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8857 rGPR:$Rm, pred:$p)>; 8858def VST4dWB_register_Asm_16 : 8859 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8860 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8861 rGPR:$Rm, pred:$p)>; 8862def VST4dWB_register_Asm_32 : 8863 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8864 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8865 rGPR:$Rm, pred:$p)>; 8866def VST4qWB_register_Asm_8 : 8867 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8868 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8869 rGPR:$Rm, pred:$p)>; 8870def VST4qWB_register_Asm_16 : 8871 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8872 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8873 rGPR:$Rm, pred:$p)>; 8874def VST4qWB_register_Asm_32 : 8875 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8876 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8877 rGPR:$Rm, pred:$p)>; 8878 8879// VMOV/VMVN takes an optional datatype suffix 8880defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8881 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 8882defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8883 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 8884 8885defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8886 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 8887defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8888 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 8889 8890// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8891// D-register versions. 8892def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 8893 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8894def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 8895 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8896def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 8897 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8898def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 8899 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8900def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 8901 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8902def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 8903 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8904def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 8905 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8906let Predicates = [HasNEON, HasFullFP16] in 8907def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", 8908 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8909// Q-register versions. 8910def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 8911 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8912def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 8913 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8914def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 8915 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8916def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 8917 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8918def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 8919 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8920def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 8921 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8922def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 8923 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8924let Predicates = [HasNEON, HasFullFP16] in 8925def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", 8926 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8927 8928// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8929// D-register versions. 8930def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 8931 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8932def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 8933 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8934def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 8935 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8936def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 8937 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8938def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 8939 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8940def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 8941 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8942def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 8943 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8944let Predicates = [HasNEON, HasFullFP16] in 8945def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", 8946 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8947// Q-register versions. 8948def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 8949 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8950def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 8951 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8952def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 8953 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8954def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 8955 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8956def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 8957 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8958def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 8959 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8960def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 8961 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8962let Predicates = [HasNEON, HasFullFP16] in 8963def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", 8964 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8965 8966// VSWP allows, but does not require, a type suffix. 8967defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8968 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 8969defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8970 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 8971 8972// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 8973defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8974 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8975defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8976 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8977defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8978 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8979defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8980 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8981defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8982 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8983defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8984 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8985 8986// "vmov Rd, #-imm" can be handled via "vmvn". 8987def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8988 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8989def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8990 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8991def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8992 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8993def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8994 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8995 8996// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 8997// these should restrict to just the Q register variants, but the register 8998// classes are enough to match correctly regardless, so we keep it simple 8999// and just use MnemonicAlias. 9000def : NEONMnemonicAlias<"vbicq", "vbic">; 9001def : NEONMnemonicAlias<"vandq", "vand">; 9002def : NEONMnemonicAlias<"veorq", "veor">; 9003def : NEONMnemonicAlias<"vorrq", "vorr">; 9004 9005def : NEONMnemonicAlias<"vmovq", "vmov">; 9006def : NEONMnemonicAlias<"vmvnq", "vmvn">; 9007// Explicit versions for floating point so that the FPImm variants get 9008// handled early. The parser gets confused otherwise. 9009def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 9010def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 9011 9012def : NEONMnemonicAlias<"vaddq", "vadd">; 9013def : NEONMnemonicAlias<"vsubq", "vsub">; 9014 9015def : NEONMnemonicAlias<"vminq", "vmin">; 9016def : NEONMnemonicAlias<"vmaxq", "vmax">; 9017 9018def : NEONMnemonicAlias<"vmulq", "vmul">; 9019 9020def : NEONMnemonicAlias<"vabsq", "vabs">; 9021 9022def : NEONMnemonicAlias<"vshlq", "vshl">; 9023def : NEONMnemonicAlias<"vshrq", "vshr">; 9024 9025def : NEONMnemonicAlias<"vcvtq", "vcvt">; 9026 9027def : NEONMnemonicAlias<"vcleq", "vcle">; 9028def : NEONMnemonicAlias<"vceqq", "vceq">; 9029 9030def : NEONMnemonicAlias<"vzipq", "vzip">; 9031def : NEONMnemonicAlias<"vswpq", "vswp">; 9032 9033def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 9034def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 9035 9036 9037// Alias for loading floating point immediates that aren't representable 9038// using the vmov.f32 encoding but the bitpattern is representable using 9039// the .i32 encoding. 9040def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9041 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9042def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9043 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9044 9045// ARMv8.6a BFloat16 instructions. 9046let Predicates = [HasBF16, HasNEON] in { 9047class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6, 9048 dag oops, dag iops, list<dag> pattern> 9049 : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops, 9050 N3RegFrm, IIC_VDOTPROD, "", "", pattern> 9051{ 9052 let DecoderNamespace = "VFPV8"; 9053} 9054 9055class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy> 9056 : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst), 9057 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9058 [(set (AccumTy RegTy:$dst), 9059 (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9060 (InputTy RegTy:$Vn), 9061 (InputTy RegTy:$Vm)))]> { 9062 let Constraints = "$dst = $Vd"; 9063 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9064 let DecoderNamespace = "VFPV8"; 9065} 9066 9067multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, 9068 ValueType InputTy, dag RHS> { 9069 9070 def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst), 9071 (ins RegTy:$Vd, RegTy:$Vn, 9072 DPR_VFP2:$Vm, VectorIndex32:$lane), []> { 9073 bit lane; 9074 let Inst{5} = lane; 9075 let Constraints = "$dst = $Vd"; 9076 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane"); 9077 let DecoderNamespace = "VFPV8"; 9078 } 9079 9080 def : Pat< 9081 (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9082 (InputTy RegTy:$Vn), 9083 (InputTy (bitconvert (AccumTy 9084 (ARMvduplane (AccumTy RegTy:$Vm), 9085 VectorIndex32:$lane)))))), 9086 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 9087} 9088 9089def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v8i8>; 9090def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v16i8>; 9091 9092defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v8i8, (v2f32 DPR_VFP2:$Vm)>; 9093defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 9094 9095class BF16MM<bit Q, RegisterClass RegTy, 9096 string opc> 9097 : N3Vnp<0b11000, 0b00, 0b1100, Q, 0, 9098 (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9099 N3RegFrm, IIC_VDOTPROD, "", "", 9100 [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd), 9101 (v16i8 QPR:$Vn), 9102 (v16i8 QPR:$Vm)))]> { 9103 let Constraints = "$dst = $Vd"; 9104 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9105 let DecoderNamespace = "VFPV8"; 9106} 9107 9108def VMMLA : BF16MM<1, QPR, "vmmla">; 9109 9110class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode> 9111 : N3VCP8<0b00, 0b11, T, 1, 9112 (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), 9113 NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "", 9114 [(set (v4f32 QPR:$dst), 9115 (OpNode (v4f32 QPR:$Vd), 9116 (v16i8 QPR:$Vn), 9117 (v16i8 QPR:$Vm)))]> { 9118 let Constraints = "$dst = $Vd"; 9119 let DecoderNamespace = "VFPV8"; 9120} 9121 9122def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>; 9123def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>; 9124 9125multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> { 9126 def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst), 9127 (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 9128 IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> { 9129 bits<2> idx; 9130 let Inst{5} = idx{1}; 9131 let Inst{3} = idx{0}; 9132 let Constraints = "$dst = $Vd"; 9133 let DecoderNamespace = "VFPV8"; 9134 } 9135 9136 def : Pat< 9137 (v4f32 (OpNode (v4f32 QPR:$Vd), 9138 (v16i8 QPR:$Vn), 9139 (v16i8 (bitconvert (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm), 9140 VectorIndex16:$lane)))))), 9141 (!cast<Instruction>(NAME) QPR:$Vd, 9142 QPR:$Vn, 9143 (EXTRACT_SUBREG QPR:$Vm, 9144 (DSubReg_i16_reg VectorIndex16:$lane)), 9145 (SubReg_i16_lane VectorIndex16:$lane))>; 9146} 9147 9148defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>; 9149defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>; 9150 9151def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0, 9152 (outs DPR:$Vd), (ins QPR:$Vm), 9153 NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>; 9154} 9155// End of BFloat16 instructions 9156