1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the ARM NEON instruction set. 10// 11//===----------------------------------------------------------------------===// 12 13 14//===----------------------------------------------------------------------===// 15// NEON-specific Operands. 16//===----------------------------------------------------------------------===// 17def nModImm : Operand<i32> { 18 let PrintMethod = "printVMOVModImmOperand"; 19} 20 21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 22def nImmSplatI8 : Operand<i32> { 23 let PrintMethod = "printVMOVModImmOperand"; 24 let ParserMatchClass = nImmSplatI8AsmOperand; 25} 26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 27def nImmSplatI16 : Operand<i32> { 28 let PrintMethod = "printVMOVModImmOperand"; 29 let ParserMatchClass = nImmSplatI16AsmOperand; 30} 31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 32def nImmSplatI32 : Operand<i32> { 33 let PrintMethod = "printVMOVModImmOperand"; 34 let ParserMatchClass = nImmSplatI32AsmOperand; 35} 36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 37def nImmSplatNotI16 : Operand<i32> { 38 let ParserMatchClass = nImmSplatNotI16AsmOperand; 39} 40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 41def nImmSplatNotI32 : Operand<i32> { 42 let ParserMatchClass = nImmSplatNotI32AsmOperand; 43} 44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 45def nImmVMOVI32 : Operand<i32> { 46 let PrintMethod = "printVMOVModImmOperand"; 47 let ParserMatchClass = nImmVMOVI32AsmOperand; 48} 49 50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To> 51 : AsmOperandClass { 52 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate"; 53 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">"; 54 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands"; 55} 56 57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To> 58 : AsmOperandClass { 59 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate"; 60 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">"; 61 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands"; 62} 63 64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> { 65 let PrintMethod = "printVMOVModImmOperand"; 66 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>; 67} 68 69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> { 70 let PrintMethod = "printVMOVModImmOperand"; 71 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>; 72} 73 74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 75def nImmVMOVI32Neg : Operand<i32> { 76 let PrintMethod = "printVMOVModImmOperand"; 77 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 78} 79def nImmVMOVF32 : Operand<i32> { 80 let PrintMethod = "printFPImmOperand"; 81 let ParserMatchClass = FPImmOperand; 82} 83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 84def nImmSplatI64 : Operand<i32> { 85 let PrintMethod = "printVMOVModImmOperand"; 86 let ParserMatchClass = nImmSplatI64AsmOperand; 87} 88 89def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } 93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 94 return ((uint64_t)Imm) < 8; 95}]> { 96 let ParserMatchClass = VectorIndex8Operand; 97 let PrintMethod = "printVectorIndex"; 98 let MIOperandInfo = (ops i32imm); 99} 100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 101 return ((uint64_t)Imm) < 4; 102}]> { 103 let ParserMatchClass = VectorIndex16Operand; 104 let PrintMethod = "printVectorIndex"; 105 let MIOperandInfo = (ops i32imm); 106} 107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 108 return ((uint64_t)Imm) < 2; 109}]> { 110 let ParserMatchClass = VectorIndex32Operand; 111 let PrintMethod = "printVectorIndex"; 112 let MIOperandInfo = (ops i32imm); 113} 114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{ 115 return ((uint64_t)Imm) < 1; 116}]> { 117 let ParserMatchClass = VectorIndex64Operand; 118 let PrintMethod = "printVectorIndex"; 119 let MIOperandInfo = (ops i32imm); 120} 121 122// Register list of one D register. 123def VecListOneDAsmOperand : AsmOperandClass { 124 let Name = "VecListOneD"; 125 let ParserMethod = "parseVectorList"; 126 let RenderMethod = "addVecListOperands"; 127} 128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 129 let ParserMatchClass = VecListOneDAsmOperand; 130} 131// Register list of two sequential D registers. 132def VecListDPairAsmOperand : AsmOperandClass { 133 let Name = "VecListDPair"; 134 let ParserMethod = "parseVectorList"; 135 let RenderMethod = "addVecListOperands"; 136} 137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 138 let ParserMatchClass = VecListDPairAsmOperand; 139} 140// Register list of three sequential D registers. 141def VecListThreeDAsmOperand : AsmOperandClass { 142 let Name = "VecListThreeD"; 143 let ParserMethod = "parseVectorList"; 144 let RenderMethod = "addVecListOperands"; 145} 146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 147 let ParserMatchClass = VecListThreeDAsmOperand; 148} 149// Register list of four sequential D registers. 150def VecListFourDAsmOperand : AsmOperandClass { 151 let Name = "VecListFourD"; 152 let ParserMethod = "parseVectorList"; 153 let RenderMethod = "addVecListOperands"; 154} 155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 156 let ParserMatchClass = VecListFourDAsmOperand; 157} 158// Register list of two D registers spaced by 2 (two sequential Q registers). 159def VecListDPairSpacedAsmOperand : AsmOperandClass { 160 let Name = "VecListDPairSpaced"; 161 let ParserMethod = "parseVectorList"; 162 let RenderMethod = "addVecListOperands"; 163} 164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 165 let ParserMatchClass = VecListDPairSpacedAsmOperand; 166} 167// Register list of three D registers spaced by 2 (three Q registers). 168def VecListThreeQAsmOperand : AsmOperandClass { 169 let Name = "VecListThreeQ"; 170 let ParserMethod = "parseVectorList"; 171 let RenderMethod = "addVecListOperands"; 172} 173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 174 let ParserMatchClass = VecListThreeQAsmOperand; 175} 176// Register list of three D registers spaced by 2 (three Q registers). 177def VecListFourQAsmOperand : AsmOperandClass { 178 let Name = "VecListFourQ"; 179 let ParserMethod = "parseVectorList"; 180 let RenderMethod = "addVecListOperands"; 181} 182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 183 let ParserMatchClass = VecListFourQAsmOperand; 184} 185 186// Register list of one D register, with "all lanes" subscripting. 187def VecListOneDAllLanesAsmOperand : AsmOperandClass { 188 let Name = "VecListOneDAllLanes"; 189 let ParserMethod = "parseVectorList"; 190 let RenderMethod = "addVecListOperands"; 191} 192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 193 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 194} 195// Register list of two D registers, with "all lanes" subscripting. 196def VecListDPairAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListDPairAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200} 201def VecListDPairAllLanes : RegisterOperand<DPair, 202 "printVectorListTwoAllLanes"> { 203 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 204} 205// Register list of two D registers spaced by 2 (two sequential Q registers). 206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 207 let Name = "VecListDPairSpacedAllLanes"; 208 let ParserMethod = "parseVectorList"; 209 let RenderMethod = "addVecListOperands"; 210} 211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc, 212 "printVectorListTwoSpacedAllLanes"> { 213 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 214} 215// Register list of three D registers, with "all lanes" subscripting. 216def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 217 let Name = "VecListThreeDAllLanes"; 218 let ParserMethod = "parseVectorList"; 219 let RenderMethod = "addVecListOperands"; 220} 221def VecListThreeDAllLanes : RegisterOperand<DPR, 222 "printVectorListThreeAllLanes"> { 223 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 224} 225// Register list of three D registers spaced by 2 (three sequential Q regs). 226def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 227 let Name = "VecListThreeQAllLanes"; 228 let ParserMethod = "parseVectorList"; 229 let RenderMethod = "addVecListOperands"; 230} 231def VecListThreeQAllLanes : RegisterOperand<DPR, 232 "printVectorListThreeSpacedAllLanes"> { 233 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 234} 235// Register list of four D registers, with "all lanes" subscripting. 236def VecListFourDAllLanesAsmOperand : AsmOperandClass { 237 let Name = "VecListFourDAllLanes"; 238 let ParserMethod = "parseVectorList"; 239 let RenderMethod = "addVecListOperands"; 240} 241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 242 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 243} 244// Register list of four D registers spaced by 2 (four sequential Q regs). 245def VecListFourQAllLanesAsmOperand : AsmOperandClass { 246 let Name = "VecListFourQAllLanes"; 247 let ParserMethod = "parseVectorList"; 248 let RenderMethod = "addVecListOperands"; 249} 250def VecListFourQAllLanes : RegisterOperand<DPR, 251 "printVectorListFourSpacedAllLanes"> { 252 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 253} 254 255 256// Register list of one D register, with byte lane subscripting. 257def VecListOneDByteIndexAsmOperand : AsmOperandClass { 258 let Name = "VecListOneDByteIndexed"; 259 let ParserMethod = "parseVectorList"; 260 let RenderMethod = "addVecListIndexedOperands"; 261} 262def VecListOneDByteIndexed : Operand<i32> { 263 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 264 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 265} 266// ...with half-word lane subscripting. 267def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 268 let Name = "VecListOneDHWordIndexed"; 269 let ParserMethod = "parseVectorList"; 270 let RenderMethod = "addVecListIndexedOperands"; 271} 272def VecListOneDHWordIndexed : Operand<i32> { 273 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 274 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 275} 276// ...with word lane subscripting. 277def VecListOneDWordIndexAsmOperand : AsmOperandClass { 278 let Name = "VecListOneDWordIndexed"; 279 let ParserMethod = "parseVectorList"; 280 let RenderMethod = "addVecListIndexedOperands"; 281} 282def VecListOneDWordIndexed : Operand<i32> { 283 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 284 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 285} 286 287// Register list of two D registers with byte lane subscripting. 288def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoDByteIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListTwoDByteIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297// ...with half-word lane subscripting. 298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 299 let Name = "VecListTwoDHWordIndexed"; 300 let ParserMethod = "parseVectorList"; 301 let RenderMethod = "addVecListIndexedOperands"; 302} 303def VecListTwoDHWordIndexed : Operand<i32> { 304 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 305 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 306} 307// ...with word lane subscripting. 308def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 309 let Name = "VecListTwoDWordIndexed"; 310 let ParserMethod = "parseVectorList"; 311 let RenderMethod = "addVecListIndexedOperands"; 312} 313def VecListTwoDWordIndexed : Operand<i32> { 314 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 315 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 316} 317// Register list of two Q registers with half-word lane subscripting. 318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 319 let Name = "VecListTwoQHWordIndexed"; 320 let ParserMethod = "parseVectorList"; 321 let RenderMethod = "addVecListIndexedOperands"; 322} 323def VecListTwoQHWordIndexed : Operand<i32> { 324 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 325 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 326} 327// ...with word lane subscripting. 328def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 329 let Name = "VecListTwoQWordIndexed"; 330 let ParserMethod = "parseVectorList"; 331 let RenderMethod = "addVecListIndexedOperands"; 332} 333def VecListTwoQWordIndexed : Operand<i32> { 334 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 335 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 336} 337 338 339// Register list of three D registers with byte lane subscripting. 340def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeDByteIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344} 345def VecListThreeDByteIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348} 349// ...with half-word lane subscripting. 350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 351 let Name = "VecListThreeDHWordIndexed"; 352 let ParserMethod = "parseVectorList"; 353 let RenderMethod = "addVecListIndexedOperands"; 354} 355def VecListThreeDHWordIndexed : Operand<i32> { 356 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 357 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 358} 359// ...with word lane subscripting. 360def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 361 let Name = "VecListThreeDWordIndexed"; 362 let ParserMethod = "parseVectorList"; 363 let RenderMethod = "addVecListIndexedOperands"; 364} 365def VecListThreeDWordIndexed : Operand<i32> { 366 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 367 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 368} 369// Register list of three Q registers with half-word lane subscripting. 370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 371 let Name = "VecListThreeQHWordIndexed"; 372 let ParserMethod = "parseVectorList"; 373 let RenderMethod = "addVecListIndexedOperands"; 374} 375def VecListThreeQHWordIndexed : Operand<i32> { 376 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 377 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 378} 379// ...with word lane subscripting. 380def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 381 let Name = "VecListThreeQWordIndexed"; 382 let ParserMethod = "parseVectorList"; 383 let RenderMethod = "addVecListIndexedOperands"; 384} 385def VecListThreeQWordIndexed : Operand<i32> { 386 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 387 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 388} 389 390// Register list of four D registers with byte lane subscripting. 391def VecListFourDByteIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourDByteIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListFourDByteIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400// ...with half-word lane subscripting. 401def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 402 let Name = "VecListFourDHWordIndexed"; 403 let ParserMethod = "parseVectorList"; 404 let RenderMethod = "addVecListIndexedOperands"; 405} 406def VecListFourDHWordIndexed : Operand<i32> { 407 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 408 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 409} 410// ...with word lane subscripting. 411def VecListFourDWordIndexAsmOperand : AsmOperandClass { 412 let Name = "VecListFourDWordIndexed"; 413 let ParserMethod = "parseVectorList"; 414 let RenderMethod = "addVecListIndexedOperands"; 415} 416def VecListFourDWordIndexed : Operand<i32> { 417 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 418 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 419} 420// Register list of four Q registers with half-word lane subscripting. 421def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 422 let Name = "VecListFourQHWordIndexed"; 423 let ParserMethod = "parseVectorList"; 424 let RenderMethod = "addVecListIndexedOperands"; 425} 426def VecListFourQHWordIndexed : Operand<i32> { 427 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 428 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 429} 430// ...with word lane subscripting. 431def VecListFourQWordIndexAsmOperand : AsmOperandClass { 432 let Name = "VecListFourQWordIndexed"; 433 let ParserMethod = "parseVectorList"; 434 let RenderMethod = "addVecListIndexedOperands"; 435} 436def VecListFourQWordIndexed : Operand<i32> { 437 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 438 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 439} 440 441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 442 return cast<LoadSDNode>(N)->getAlignment() >= 8; 443}]>; 444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 445 (store node:$val, node:$ptr), [{ 446 return cast<StoreSDNode>(N)->getAlignment() >= 8; 447}]>; 448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 449 return cast<LoadSDNode>(N)->getAlignment() == 4; 450}]>; 451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 452 (store node:$val, node:$ptr), [{ 453 return cast<StoreSDNode>(N)->getAlignment() == 4; 454}]>; 455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 456 return cast<LoadSDNode>(N)->getAlignment() == 2; 457}]>; 458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 459 (store node:$val, node:$ptr), [{ 460 return cast<StoreSDNode>(N)->getAlignment() == 2; 461}]>; 462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 463 return cast<LoadSDNode>(N)->getAlignment() == 1; 464}]>; 465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 466 (store node:$val, node:$ptr), [{ 467 return cast<StoreSDNode>(N)->getAlignment() == 1; 468}]>; 469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 470 return cast<LoadSDNode>(N)->getAlignment() < 4; 471}]>; 472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 473 (store node:$val, node:$ptr), [{ 474 return cast<StoreSDNode>(N)->getAlignment() < 4; 475}]>; 476 477//===----------------------------------------------------------------------===// 478// NEON-specific DAG Nodes. 479//===----------------------------------------------------------------------===// 480 481def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 482def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; 483 484// Types for vector shift by immediates. The "SHX" version is for long and 485// narrow operations where the source and destination vectors have different 486// types. The "SHINS" version is for shift and insert operations. 487def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 488 SDTCisVT<2, i32>]>; 489def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 490 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 491 492def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; 493 494def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; 495def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>; 496def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>; 497 498def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>; 499def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>; 500def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>; 501def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>; 502def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>; 503def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>; 504 505def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>; 506def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>; 507def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; 508 509def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; 510def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; 511 512def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 513 SDTCisVT<2, i32>]>; 514def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 515def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 516 517def NEONvbsl : SDNode<"ARMISD::VBSL", 518 SDTypeProfile<1, 3, [SDTCisVec<0>, 519 SDTCisSameAs<0, 1>, 520 SDTCisSameAs<0, 2>, 521 SDTCisSameAs<0, 3>]>>; 522 523def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 524 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 525def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 526 527def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 528 SDTCisSameAs<0, 2>, 529 SDTCisSameAs<0, 3>]>; 530def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 531def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 532def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 533 534def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 535 SDTCisSameAs<1, 2>]>; 536def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 537def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 538 539def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 540 SDTCisVT<2, v8i8>]>; 541def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 542 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; 543def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; 544def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; 545 546 547def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{ 548 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 549 unsigned EltBits = 0; 550 uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits); 551 return (EltBits == 32 && EltVal == 0); 552}]>; 553 554def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{ 555 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 556 unsigned EltBits = 0; 557 uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits); 558 return (EltBits == 8 && EltVal == 0xff); 559}]>; 560 561//===----------------------------------------------------------------------===// 562// NEON load / store instructions 563//===----------------------------------------------------------------------===// 564 565// Use VLDM to load a Q register as a D register pair. 566// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 567def VLDMQIA 568 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 569 IIC_fpLoad_m, "", 570 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; 571 572// Use VSTM to store a Q register as a D register pair. 573// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 574def VSTMQIA 575 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 576 IIC_fpStore_m, "", 577 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; 578 579// Classes for VLD* pseudo-instructions with multi-register operands. 580// These are expanded to real instructions after register allocation. 581class VLDQPseudo<InstrItinClass itin> 582 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 583class VLDQWBPseudo<InstrItinClass itin> 584 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 585 (ins addrmode6:$addr, am6offset:$offset), itin, 586 "$addr.addr = $wb">; 587class VLDQWBfixedPseudo<InstrItinClass itin> 588 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 589 (ins addrmode6:$addr), itin, 590 "$addr.addr = $wb">; 591class VLDQWBregisterPseudo<InstrItinClass itin> 592 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 593 (ins addrmode6:$addr, rGPR:$offset), itin, 594 "$addr.addr = $wb">; 595 596class VLDQQPseudo<InstrItinClass itin> 597 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 598class VLDQQWBPseudo<InstrItinClass itin> 599 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 600 (ins addrmode6:$addr, am6offset:$offset), itin, 601 "$addr.addr = $wb">; 602class VLDQQWBfixedPseudo<InstrItinClass itin> 603 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 604 (ins addrmode6:$addr), itin, 605 "$addr.addr = $wb">; 606class VLDQQWBregisterPseudo<InstrItinClass itin> 607 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 608 (ins addrmode6:$addr, rGPR:$offset), itin, 609 "$addr.addr = $wb">; 610 611 612class VLDQQQQPseudo<InstrItinClass itin> 613 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 614 "$src = $dst">; 615class VLDQQQQWBPseudo<InstrItinClass itin> 616 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 617 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 618 "$addr.addr = $wb, $src = $dst">; 619 620let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 621 622// VLD1 : Vector Load (multiple single elements) 623class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 624 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 625 (ins AddrMode:$Rn), IIC_VLD1, 626 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { 627 let Rm = 0b1111; 628 let Inst{4} = Rn{4}; 629 let DecoderMethod = "DecodeVLDST1Instruction"; 630} 631class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 632 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 633 (ins AddrMode:$Rn), IIC_VLD1x2, 634 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { 635 let Rm = 0b1111; 636 let Inst{5-4} = Rn{5-4}; 637 let DecoderMethod = "DecodeVLDST1Instruction"; 638} 639 640def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 641def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 642def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 643def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 644 645def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 646def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 647def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 648def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 649 650// ...with address register writeback: 651multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 652 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 653 (ins AddrMode:$Rn), IIC_VLD1u, 654 "vld1", Dt, "$Vd, $Rn!", 655 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 656 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 657 let Inst{4} = Rn{4}; 658 let DecoderMethod = "DecodeVLDST1Instruction"; 659 } 660 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 661 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 662 "vld1", Dt, "$Vd, $Rn, $Rm", 663 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 664 let Inst{4} = Rn{4}; 665 let DecoderMethod = "DecodeVLDST1Instruction"; 666 } 667} 668multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 669 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 670 (ins AddrMode:$Rn), IIC_VLD1x2u, 671 "vld1", Dt, "$Vd, $Rn!", 672 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 673 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 674 let Inst{5-4} = Rn{5-4}; 675 let DecoderMethod = "DecodeVLDST1Instruction"; 676 } 677 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 678 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 679 "vld1", Dt, "$Vd, $Rn, $Rm", 680 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 681 let Inst{5-4} = Rn{5-4}; 682 let DecoderMethod = "DecodeVLDST1Instruction"; 683 } 684} 685 686defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 687defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 688defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 689defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 690defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 691defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 692defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 693defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 694 695// ...with 3 registers 696class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 697 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 698 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 699 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { 700 let Rm = 0b1111; 701 let Inst{4} = Rn{4}; 702 let DecoderMethod = "DecodeVLDST1Instruction"; 703} 704multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 705 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 706 (ins AddrMode:$Rn), IIC_VLD1x2u, 707 "vld1", Dt, "$Vd, $Rn!", 708 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 709 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 710 let Inst{4} = Rn{4}; 711 let DecoderMethod = "DecodeVLDST1Instruction"; 712 } 713 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 714 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 715 "vld1", Dt, "$Vd, $Rn, $Rm", 716 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 717 let Inst{4} = Rn{4}; 718 let DecoderMethod = "DecodeVLDST1Instruction"; 719 } 720} 721 722def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 723def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 724def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 725def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 726 727defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 728defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 729defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 730defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 731 732def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 733def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 734def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 735def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 736def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 737def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 738 739def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 740def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 741def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 742def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 743def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 744def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 745def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 746def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 747 748// ...with 4 registers 749class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 750 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 751 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 752 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { 753 let Rm = 0b1111; 754 let Inst{5-4} = Rn{5-4}; 755 let DecoderMethod = "DecodeVLDST1Instruction"; 756} 757multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 758 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 759 (ins AddrMode:$Rn), IIC_VLD1x2u, 760 "vld1", Dt, "$Vd, $Rn!", 761 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 762 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 763 let Inst{5-4} = Rn{5-4}; 764 let DecoderMethod = "DecodeVLDST1Instruction"; 765 } 766 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 767 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 768 "vld1", Dt, "$Vd, $Rn, $Rm", 769 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 770 let Inst{5-4} = Rn{5-4}; 771 let DecoderMethod = "DecodeVLDST1Instruction"; 772 } 773} 774 775def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 776def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 777def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 778def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 779 780defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 781defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 782defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 783defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 784 785def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 786def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 787def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 788def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 789def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 790def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 791 792def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 793def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 794def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 795def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 796def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 797def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 798def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 799def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 800 801// VLD2 : Vector Load (multiple 2-element structures) 802class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 803 InstrItinClass itin, Operand AddrMode> 804 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 805 (ins AddrMode:$Rn), itin, 806 "vld2", Dt, "$Vd, $Rn", "", []> { 807 let Rm = 0b1111; 808 let Inst{5-4} = Rn{5-4}; 809 let DecoderMethod = "DecodeVLDST2Instruction"; 810} 811 812def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 813 addrmode6align64or128>, Sched<[WriteVLD2]>; 814def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 815 addrmode6align64or128>, Sched<[WriteVLD2]>; 816def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 817 addrmode6align64or128>, Sched<[WriteVLD2]>; 818 819def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 820 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 821def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 822 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 823def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 824 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 825 826def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 827def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 828def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 829 830// ...with address register writeback: 831multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 832 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 833 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 834 (ins AddrMode:$Rn), itin, 835 "vld2", Dt, "$Vd, $Rn!", 836 "$Rn.addr = $wb", []> { 837 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 838 let Inst{5-4} = Rn{5-4}; 839 let DecoderMethod = "DecodeVLDST2Instruction"; 840 } 841 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 842 (ins AddrMode:$Rn, rGPR:$Rm), itin, 843 "vld2", Dt, "$Vd, $Rn, $Rm", 844 "$Rn.addr = $wb", []> { 845 let Inst{5-4} = Rn{5-4}; 846 let DecoderMethod = "DecodeVLDST2Instruction"; 847 } 848} 849 850defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 851 addrmode6align64or128>, Sched<[WriteVLD2]>; 852defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 853 addrmode6align64or128>, Sched<[WriteVLD2]>; 854defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 855 addrmode6align64or128>, Sched<[WriteVLD2]>; 856 857defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 858 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 859defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 860 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 861defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 862 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 863 864def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 865def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 866def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 867def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 868def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 869def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 870 871// ...with double-spaced registers 872def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 873 addrmode6align64or128>, Sched<[WriteVLD2]>; 874def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 875 addrmode6align64or128>, Sched<[WriteVLD2]>; 876def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 877 addrmode6align64or128>, Sched<[WriteVLD2]>; 878defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 879 addrmode6align64or128>, Sched<[WriteVLD2]>; 880defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 881 addrmode6align64or128>, Sched<[WriteVLD2]>; 882defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 883 addrmode6align64or128>, Sched<[WriteVLD2]>; 884 885// VLD3 : Vector Load (multiple 3-element structures) 886class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 887 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 888 (ins addrmode6:$Rn), IIC_VLD3, 889 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { 890 let Rm = 0b1111; 891 let Inst{4} = Rn{4}; 892 let DecoderMethod = "DecodeVLDST3Instruction"; 893} 894 895def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 896def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 897def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 898 899def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 900def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 901def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 902 903// ...with address register writeback: 904class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 905 : NLdSt<0, 0b10, op11_8, op7_4, 906 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 907 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 908 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 909 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 910 let Inst{4} = Rn{4}; 911 let DecoderMethod = "DecodeVLDST3Instruction"; 912} 913 914def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 915def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 916def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 917 918def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 919def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 920def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 921 922// ...with double-spaced registers: 923def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 924def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 925def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 926def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 927def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 928def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 929 930def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 931def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 932def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 933 934// ...alternate versions to be allocated odd register numbers: 935def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 936def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 937def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 938 939def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 940def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 941def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 942 943// VLD4 : Vector Load (multiple 4-element structures) 944class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 945 : NLdSt<0, 0b10, op11_8, op7_4, 946 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 947 (ins addrmode6:$Rn), IIC_VLD4, 948 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, 949 Sched<[WriteVLD4]> { 950 let Rm = 0b1111; 951 let Inst{5-4} = Rn{5-4}; 952 let DecoderMethod = "DecodeVLDST4Instruction"; 953} 954 955def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 956def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 957def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 958 959def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 960def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 961def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 962 963// ...with address register writeback: 964class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 965 : NLdSt<0, 0b10, op11_8, op7_4, 966 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 967 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 968 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 969 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 970 let Inst{5-4} = Rn{5-4}; 971 let DecoderMethod = "DecodeVLDST4Instruction"; 972} 973 974def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 975def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 976def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 977 978def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 979def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 980def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 981 982// ...with double-spaced registers: 983def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 984def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 985def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 986def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 987def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 988def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 989 990def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 991def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 992def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 993 994// ...alternate versions to be allocated odd register numbers: 995def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 996def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 997def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 998 999def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1000def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1001def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1002 1003} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1004 1005// Classes for VLD*LN pseudo-instructions with multi-register operands. 1006// These are expanded to real instructions after register allocation. 1007class VLDQLNPseudo<InstrItinClass itin> 1008 : PseudoNLdSt<(outs QPR:$dst), 1009 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1010 itin, "$src = $dst">; 1011class VLDQLNWBPseudo<InstrItinClass itin> 1012 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 1013 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1014 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1015class VLDQQLNPseudo<InstrItinClass itin> 1016 : PseudoNLdSt<(outs QQPR:$dst), 1017 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1018 itin, "$src = $dst">; 1019class VLDQQLNWBPseudo<InstrItinClass itin> 1020 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 1021 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1022 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1023class VLDQQQQLNPseudo<InstrItinClass itin> 1024 : PseudoNLdSt<(outs QQQQPR:$dst), 1025 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1026 itin, "$src = $dst">; 1027class VLDQQQQLNWBPseudo<InstrItinClass itin> 1028 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1029 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1030 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1031 1032// VLD1LN : Vector Load (single element to one lane) 1033class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1034 PatFrag LoadOp> 1035 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1036 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1037 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1038 "$src = $Vd", 1039 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1040 (i32 (LoadOp addrmode6:$Rn)), 1041 imm:$lane))]> { 1042 let Rm = 0b1111; 1043 let DecoderMethod = "DecodeVLD1LN"; 1044} 1045class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1046 PatFrag LoadOp> 1047 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1048 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1049 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1050 "$src = $Vd", 1051 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1052 (i32 (LoadOp addrmode6oneL32:$Rn)), 1053 imm:$lane))]>, Sched<[WriteVLD1]> { 1054 let Rm = 0b1111; 1055 let DecoderMethod = "DecodeVLD1LN"; 1056} 1057class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>, 1058 Sched<[WriteVLD1]> { 1059 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1060 (i32 (LoadOp addrmode6:$addr)), 1061 imm:$lane))]; 1062} 1063 1064def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1065 let Inst{7-5} = lane{2-0}; 1066} 1067def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1068 let Inst{7-6} = lane{1-0}; 1069 let Inst{5-4} = Rn{5-4}; 1070} 1071def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1072 let Inst{7} = lane{0}; 1073 let Inst{5-4} = Rn{5-4}; 1074} 1075 1076def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1077def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1078def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1079 1080let Predicates = [HasNEON] in { 1081def : Pat<(vector_insert (v4f16 DPR:$src), 1082 (f16 (load addrmode6:$addr)), imm:$lane), 1083 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1084def : Pat<(vector_insert (v8f16 QPR:$src), 1085 (f16 (load addrmode6:$addr)), imm:$lane), 1086 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1087def : Pat<(vector_insert (v2f32 DPR:$src), 1088 (f32 (load addrmode6:$addr)), imm:$lane), 1089 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1090def : Pat<(vector_insert (v4f32 QPR:$src), 1091 (f32 (load addrmode6:$addr)), imm:$lane), 1092 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1093 1094// A 64-bit subvector insert to the first 128-bit vector position 1095// is a subregister copy that needs no instruction. 1096def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), 1097 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1098def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), 1099 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1100def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), 1101 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1102def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), 1103 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1104def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), 1105 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1106def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), 1107 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1108} 1109 1110 1111let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1112 1113// ...with address register writeback: 1114class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1115 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1116 (ins addrmode6:$Rn, am6offset:$Rm, 1117 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1118 "\\{$Vd[$lane]\\}, $Rn$Rm", 1119 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1120 let DecoderMethod = "DecodeVLD1LN"; 1121} 1122 1123def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1124 let Inst{7-5} = lane{2-0}; 1125} 1126def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1127 let Inst{7-6} = lane{1-0}; 1128 let Inst{4} = Rn{4}; 1129} 1130def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1131 let Inst{7} = lane{0}; 1132 let Inst{5} = Rn{4}; 1133 let Inst{4} = Rn{4}; 1134} 1135 1136def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1137def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1138def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1139 1140// VLD2LN : Vector Load (single 2-element structure to one lane) 1141class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1142 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1143 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1144 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1145 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { 1146 let Rm = 0b1111; 1147 let Inst{4} = Rn{4}; 1148 let DecoderMethod = "DecodeVLD2LN"; 1149} 1150 1151def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1152 let Inst{7-5} = lane{2-0}; 1153} 1154def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1155 let Inst{7-6} = lane{1-0}; 1156} 1157def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1158 let Inst{7} = lane{0}; 1159} 1160 1161def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1162def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1163def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1164 1165// ...with double-spaced registers: 1166def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1167 let Inst{7-6} = lane{1-0}; 1168} 1169def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1170 let Inst{7} = lane{0}; 1171} 1172 1173def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1174def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1175 1176// ...with address register writeback: 1177class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1178 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1179 (ins addrmode6:$Rn, am6offset:$Rm, 1180 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1181 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1182 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1183 let Inst{4} = Rn{4}; 1184 let DecoderMethod = "DecodeVLD2LN"; 1185} 1186 1187def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1188 let Inst{7-5} = lane{2-0}; 1189} 1190def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1191 let Inst{7-6} = lane{1-0}; 1192} 1193def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1194 let Inst{7} = lane{0}; 1195} 1196 1197def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1198def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1199def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1200 1201def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1202 let Inst{7-6} = lane{1-0}; 1203} 1204def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1205 let Inst{7} = lane{0}; 1206} 1207 1208def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1209def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1210 1211// VLD3LN : Vector Load (single 3-element structure to one lane) 1212class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1213 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1214 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1215 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1216 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1217 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { 1218 let Rm = 0b1111; 1219 let DecoderMethod = "DecodeVLD3LN"; 1220} 1221 1222def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1223 let Inst{7-5} = lane{2-0}; 1224} 1225def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1226 let Inst{7-6} = lane{1-0}; 1227} 1228def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1229 let Inst{7} = lane{0}; 1230} 1231 1232def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1233def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1234def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1235 1236// ...with double-spaced registers: 1237def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1238 let Inst{7-6} = lane{1-0}; 1239} 1240def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1241 let Inst{7} = lane{0}; 1242} 1243 1244def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1245def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1246 1247// ...with address register writeback: 1248class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1249 : NLdStLn<1, 0b10, op11_8, op7_4, 1250 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1251 (ins addrmode6:$Rn, am6offset:$Rm, 1252 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1253 IIC_VLD3lnu, "vld3", Dt, 1254 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1255 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1256 []>, Sched<[WriteVLD2]> { 1257 let DecoderMethod = "DecodeVLD3LN"; 1258} 1259 1260def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1261 let Inst{7-5} = lane{2-0}; 1262} 1263def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1264 let Inst{7-6} = lane{1-0}; 1265} 1266def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1267 let Inst{7} = lane{0}; 1268} 1269 1270def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1271def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1272def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1273 1274def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1275 let Inst{7-6} = lane{1-0}; 1276} 1277def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1278 let Inst{7} = lane{0}; 1279} 1280 1281def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1282def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1283 1284// VLD4LN : Vector Load (single 4-element structure to one lane) 1285class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1286 : NLdStLn<1, 0b10, op11_8, op7_4, 1287 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1288 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1289 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1290 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1291 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, 1292 Sched<[WriteVLD2]> { 1293 let Rm = 0b1111; 1294 let Inst{4} = Rn{4}; 1295 let DecoderMethod = "DecodeVLD4LN"; 1296} 1297 1298def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1299 let Inst{7-5} = lane{2-0}; 1300} 1301def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1302 let Inst{7-6} = lane{1-0}; 1303} 1304def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1305 let Inst{7} = lane{0}; 1306 let Inst{5} = Rn{5}; 1307} 1308 1309def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1310def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1311def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1312 1313// ...with double-spaced registers: 1314def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1315 let Inst{7-6} = lane{1-0}; 1316} 1317def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1318 let Inst{7} = lane{0}; 1319 let Inst{5} = Rn{5}; 1320} 1321 1322def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1323def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1324 1325// ...with address register writeback: 1326class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1327 : NLdStLn<1, 0b10, op11_8, op7_4, 1328 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1329 (ins addrmode6:$Rn, am6offset:$Rm, 1330 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1331 IIC_VLD4lnu, "vld4", Dt, 1332"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1333"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1334 []> { 1335 let Inst{4} = Rn{4}; 1336 let DecoderMethod = "DecodeVLD4LN" ; 1337} 1338 1339def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1340 let Inst{7-5} = lane{2-0}; 1341} 1342def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1343 let Inst{7-6} = lane{1-0}; 1344} 1345def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1346 let Inst{7} = lane{0}; 1347 let Inst{5} = Rn{5}; 1348} 1349 1350def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1351def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1352def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1353 1354def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1355 let Inst{7-6} = lane{1-0}; 1356} 1357def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1358 let Inst{7} = lane{0}; 1359 let Inst{5} = Rn{5}; 1360} 1361 1362def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1363def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1364 1365} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1366 1367// VLD1DUP : Vector Load (single element to all lanes) 1368class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1369 Operand AddrMode> 1370 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1371 (ins AddrMode:$Rn), 1372 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1373 [(set VecListOneDAllLanes:$Vd, 1374 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>, 1375 Sched<[WriteVLD2]> { 1376 let Rm = 0b1111; 1377 let Inst{4} = Rn{4}; 1378 let DecoderMethod = "DecodeVLD1DupInstruction"; 1379} 1380def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1381 addrmode6dupalignNone>; 1382def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1383 addrmode6dupalign16>; 1384def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1385 addrmode6dupalign32>; 1386 1387let Predicates = [HasNEON] in { 1388def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1389 (VLD1DUPd32 addrmode6:$addr)>; 1390} 1391 1392class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1393 Operand AddrMode> 1394 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1395 (ins AddrMode:$Rn), IIC_VLD1dup, 1396 "vld1", Dt, "$Vd, $Rn", "", 1397 [(set VecListDPairAllLanes:$Vd, 1398 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1399 let Rm = 0b1111; 1400 let Inst{4} = Rn{4}; 1401 let DecoderMethod = "DecodeVLD1DupInstruction"; 1402} 1403 1404def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1405 addrmode6dupalignNone>; 1406def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1407 addrmode6dupalign16>; 1408def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1409 addrmode6dupalign32>; 1410 1411let Predicates = [HasNEON] in { 1412def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1413 (VLD1DUPq32 addrmode6:$addr)>; 1414} 1415 1416let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1417// ...with address register writeback: 1418multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1419 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1420 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1421 (ins AddrMode:$Rn), IIC_VLD1dupu, 1422 "vld1", Dt, "$Vd, $Rn!", 1423 "$Rn.addr = $wb", []> { 1424 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1425 let Inst{4} = Rn{4}; 1426 let DecoderMethod = "DecodeVLD1DupInstruction"; 1427 } 1428 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1429 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1430 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1431 "vld1", Dt, "$Vd, $Rn, $Rm", 1432 "$Rn.addr = $wb", []> { 1433 let Inst{4} = Rn{4}; 1434 let DecoderMethod = "DecodeVLD1DupInstruction"; 1435 } 1436} 1437multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1438 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1439 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1440 (ins AddrMode:$Rn), IIC_VLD1dupu, 1441 "vld1", Dt, "$Vd, $Rn!", 1442 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1443 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1444 let Inst{4} = Rn{4}; 1445 let DecoderMethod = "DecodeVLD1DupInstruction"; 1446 } 1447 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1448 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1449 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1450 "vld1", Dt, "$Vd, $Rn, $Rm", 1451 "$Rn.addr = $wb", []> { 1452 let Inst{4} = Rn{4}; 1453 let DecoderMethod = "DecodeVLD1DupInstruction"; 1454 } 1455} 1456 1457defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1458defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1459defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1460 1461defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1462defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1463defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1464 1465// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1466class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1467 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1468 (ins AddrMode:$Rn), IIC_VLD2dup, 1469 "vld2", Dt, "$Vd, $Rn", "", []> { 1470 let Rm = 0b1111; 1471 let Inst{4} = Rn{4}; 1472 let DecoderMethod = "DecodeVLD2DupInstruction"; 1473} 1474 1475def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1476 addrmode6dupalign16>; 1477def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1478 addrmode6dupalign32>; 1479def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1480 addrmode6dupalign64>; 1481 1482// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1483// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1484// ...with double-spaced registers 1485def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1486 addrmode6dupalign16>; 1487def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1488 addrmode6dupalign32>; 1489def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1490 addrmode6dupalign64>; 1491 1492def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1493def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1494def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1495def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1496def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1497def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1498 1499// ...with address register writeback: 1500multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1501 Operand AddrMode> { 1502 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1503 (outs VdTy:$Vd, GPR:$wb), 1504 (ins AddrMode:$Rn), IIC_VLD2dupu, 1505 "vld2", Dt, "$Vd, $Rn!", 1506 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1507 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1508 let Inst{4} = Rn{4}; 1509 let DecoderMethod = "DecodeVLD2DupInstruction"; 1510 } 1511 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1512 (outs VdTy:$Vd, GPR:$wb), 1513 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1514 "vld2", Dt, "$Vd, $Rn, $Rm", 1515 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1516 let Inst{4} = Rn{4}; 1517 let DecoderMethod = "DecodeVLD2DupInstruction"; 1518 } 1519} 1520 1521defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1522 addrmode6dupalign16>; 1523defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1524 addrmode6dupalign32>; 1525defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1526 addrmode6dupalign64>; 1527 1528defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1529 addrmode6dupalign16>; 1530defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1531 addrmode6dupalign32>; 1532defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1533 addrmode6dupalign64>; 1534 1535// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1536class VLD3DUP<bits<4> op7_4, string Dt> 1537 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1538 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1539 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, 1540 Sched<[WriteVLD2]> { 1541 let Rm = 0b1111; 1542 let Inst{4} = 0; 1543 let DecoderMethod = "DecodeVLD3DupInstruction"; 1544} 1545 1546def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1547def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1548def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1549 1550def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1551def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1552def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1553 1554// ...with double-spaced registers (not used for codegen): 1555def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1556def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1557def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1558 1559def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1560def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1561def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1562def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1563def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1564def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1565 1566// ...with address register writeback: 1567class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1568 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1569 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1570 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1571 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1572 let Inst{4} = 0; 1573 let DecoderMethod = "DecodeVLD3DupInstruction"; 1574} 1575 1576def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1577def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1578def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1579 1580def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1581def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1582def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1583 1584def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1585def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1586def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1587 1588// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1589class VLD4DUP<bits<4> op7_4, string Dt> 1590 : NLdSt<1, 0b10, 0b1111, op7_4, 1591 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1592 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1593 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1594 let Rm = 0b1111; 1595 let Inst{4} = Rn{4}; 1596 let DecoderMethod = "DecodeVLD4DupInstruction"; 1597} 1598 1599def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1600def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1601def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1602 1603def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1604def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1605def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1606 1607// ...with double-spaced registers (not used for codegen): 1608def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1609def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1610def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1611 1612def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1613def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1614def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1615def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1616def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1617def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1618 1619// ...with address register writeback: 1620class VLD4DUPWB<bits<4> op7_4, string Dt> 1621 : NLdSt<1, 0b10, 0b1111, op7_4, 1622 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1623 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1624 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1625 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1626 let Inst{4} = Rn{4}; 1627 let DecoderMethod = "DecodeVLD4DupInstruction"; 1628} 1629 1630def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1631def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1632def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1633 1634def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1635def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1636def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1637 1638def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1639def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1640def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1641 1642} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1643 1644let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1645 1646// Classes for VST* pseudo-instructions with multi-register operands. 1647// These are expanded to real instructions after register allocation. 1648class VSTQPseudo<InstrItinClass itin> 1649 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1650class VSTQWBPseudo<InstrItinClass itin> 1651 : PseudoNLdSt<(outs GPR:$wb), 1652 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1653 "$addr.addr = $wb">; 1654class VSTQWBfixedPseudo<InstrItinClass itin> 1655 : PseudoNLdSt<(outs GPR:$wb), 1656 (ins addrmode6:$addr, QPR:$src), itin, 1657 "$addr.addr = $wb">; 1658class VSTQWBregisterPseudo<InstrItinClass itin> 1659 : PseudoNLdSt<(outs GPR:$wb), 1660 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1661 "$addr.addr = $wb">; 1662class VSTQQPseudo<InstrItinClass itin> 1663 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1664class VSTQQWBPseudo<InstrItinClass itin> 1665 : PseudoNLdSt<(outs GPR:$wb), 1666 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1667 "$addr.addr = $wb">; 1668class VSTQQWBfixedPseudo<InstrItinClass itin> 1669 : PseudoNLdSt<(outs GPR:$wb), 1670 (ins addrmode6:$addr, QQPR:$src), itin, 1671 "$addr.addr = $wb">; 1672class VSTQQWBregisterPseudo<InstrItinClass itin> 1673 : PseudoNLdSt<(outs GPR:$wb), 1674 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1675 "$addr.addr = $wb">; 1676 1677class VSTQQQQPseudo<InstrItinClass itin> 1678 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1679class VSTQQQQWBPseudo<InstrItinClass itin> 1680 : PseudoNLdSt<(outs GPR:$wb), 1681 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1682 "$addr.addr = $wb">; 1683 1684// VST1 : Vector Store (multiple single elements) 1685class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1686 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1687 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { 1688 let Rm = 0b1111; 1689 let Inst{4} = Rn{4}; 1690 let DecoderMethod = "DecodeVLDST1Instruction"; 1691} 1692class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1693 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1694 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { 1695 let Rm = 0b1111; 1696 let Inst{5-4} = Rn{5-4}; 1697 let DecoderMethod = "DecodeVLDST1Instruction"; 1698} 1699 1700def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1701def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1702def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1703def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1704 1705def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1706def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1707def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1708def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1709 1710// ...with address register writeback: 1711multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1712 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1713 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1714 "vst1", Dt, "$Vd, $Rn!", 1715 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1716 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1717 let Inst{4} = Rn{4}; 1718 let DecoderMethod = "DecodeVLDST1Instruction"; 1719 } 1720 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1721 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1722 IIC_VLD1u, 1723 "vst1", Dt, "$Vd, $Rn, $Rm", 1724 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1725 let Inst{4} = Rn{4}; 1726 let DecoderMethod = "DecodeVLDST1Instruction"; 1727 } 1728} 1729multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1730 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1731 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1732 "vst1", Dt, "$Vd, $Rn!", 1733 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1734 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1735 let Inst{5-4} = Rn{5-4}; 1736 let DecoderMethod = "DecodeVLDST1Instruction"; 1737 } 1738 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1739 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1740 IIC_VLD1x2u, 1741 "vst1", Dt, "$Vd, $Rn, $Rm", 1742 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1743 let Inst{5-4} = Rn{5-4}; 1744 let DecoderMethod = "DecodeVLDST1Instruction"; 1745 } 1746} 1747 1748defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1749defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1750defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1751defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1752 1753defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1754defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1755defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1756defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1757 1758// ...with 3 registers 1759class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1760 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1761 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1762 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { 1763 let Rm = 0b1111; 1764 let Inst{4} = Rn{4}; 1765 let DecoderMethod = "DecodeVLDST1Instruction"; 1766} 1767multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1768 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1769 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1770 "vst1", Dt, "$Vd, $Rn!", 1771 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1772 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1773 let Inst{5-4} = Rn{5-4}; 1774 let DecoderMethod = "DecodeVLDST1Instruction"; 1775 } 1776 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1777 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1778 IIC_VLD1x3u, 1779 "vst1", Dt, "$Vd, $Rn, $Rm", 1780 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1781 let Inst{5-4} = Rn{5-4}; 1782 let DecoderMethod = "DecodeVLDST1Instruction"; 1783 } 1784} 1785 1786def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1787def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1788def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1789def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1790 1791defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1792defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1793defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1794defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1795 1796def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1797def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1798def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1799def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1800def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1801def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1802 1803def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1804def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1805def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1806def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1807def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1808def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1809def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1810def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1811 1812// ...with 4 registers 1813class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1814 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1815 (ins AddrMode:$Rn, VecListFourD:$Vd), 1816 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1817 []>, Sched<[WriteVST4]> { 1818 let Rm = 0b1111; 1819 let Inst{5-4} = Rn{5-4}; 1820 let DecoderMethod = "DecodeVLDST1Instruction"; 1821} 1822multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1823 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1824 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1825 "vst1", Dt, "$Vd, $Rn!", 1826 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1827 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1828 let Inst{5-4} = Rn{5-4}; 1829 let DecoderMethod = "DecodeVLDST1Instruction"; 1830 } 1831 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1832 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1833 IIC_VLD1x4u, 1834 "vst1", Dt, "$Vd, $Rn, $Rm", 1835 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1836 let Inst{5-4} = Rn{5-4}; 1837 let DecoderMethod = "DecodeVLDST1Instruction"; 1838 } 1839} 1840 1841def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1842def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1843def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1844def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1845 1846defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1847defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1848defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1849defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1850 1851def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1852def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1853def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1854def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1855def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1856def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1857 1858def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1859def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1860def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1861def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1862def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1863def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1864def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1865def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1866 1867// VST2 : Vector Store (multiple 2-element structures) 1868class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1869 InstrItinClass itin, Operand AddrMode> 1870 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1871 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1872 let Rm = 0b1111; 1873 let Inst{5-4} = Rn{5-4}; 1874 let DecoderMethod = "DecodeVLDST2Instruction"; 1875} 1876 1877def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1878 addrmode6align64or128>, Sched<[WriteVST2]>; 1879def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1880 addrmode6align64or128>, Sched<[WriteVST2]>; 1881def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1882 addrmode6align64or128>, Sched<[WriteVST2]>; 1883 1884def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1885 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1886def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1887 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1888def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1889 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1890 1891def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1892def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1893def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1894 1895// ...with address register writeback: 1896multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1897 RegisterOperand VdTy, Operand AddrMode> { 1898 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1899 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1900 "vst2", Dt, "$Vd, $Rn!", 1901 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1902 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1903 let Inst{5-4} = Rn{5-4}; 1904 let DecoderMethod = "DecodeVLDST2Instruction"; 1905 } 1906 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1907 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1908 "vst2", Dt, "$Vd, $Rn, $Rm", 1909 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1910 let Inst{5-4} = Rn{5-4}; 1911 let DecoderMethod = "DecodeVLDST2Instruction"; 1912 } 1913} 1914multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1915 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1916 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1917 "vst2", Dt, "$Vd, $Rn!", 1918 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1919 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1920 let Inst{5-4} = Rn{5-4}; 1921 let DecoderMethod = "DecodeVLDST2Instruction"; 1922 } 1923 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1924 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1925 IIC_VLD1u, 1926 "vst2", Dt, "$Vd, $Rn, $Rm", 1927 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1928 let Inst{5-4} = Rn{5-4}; 1929 let DecoderMethod = "DecodeVLDST2Instruction"; 1930 } 1931} 1932 1933defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1934 addrmode6align64or128>; 1935defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1936 addrmode6align64or128>; 1937defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1938 addrmode6align64or128>; 1939 1940defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1941defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1942defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1943 1944def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1945def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1946def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1947def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1948def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1949def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1950 1951// ...with double-spaced registers 1952def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1953 addrmode6align64or128>; 1954def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1955 addrmode6align64or128>; 1956def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1957 addrmode6align64or128>; 1958defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 1959 addrmode6align64or128>; 1960defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 1961 addrmode6align64or128>; 1962defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 1963 addrmode6align64or128>; 1964 1965// VST3 : Vector Store (multiple 3-element structures) 1966class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1967 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1968 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1969 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { 1970 let Rm = 0b1111; 1971 let Inst{4} = Rn{4}; 1972 let DecoderMethod = "DecodeVLDST3Instruction"; 1973} 1974 1975def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1976def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1977def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1978 1979def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1980def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1981def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1982 1983// ...with address register writeback: 1984class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1985 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1986 (ins addrmode6:$Rn, am6offset:$Rm, 1987 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1988 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1989 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1990 let Inst{4} = Rn{4}; 1991 let DecoderMethod = "DecodeVLDST3Instruction"; 1992} 1993 1994def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1995def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1996def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1997 1998def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 1999def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2000def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2001 2002// ...with double-spaced registers: 2003def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 2004def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 2005def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 2006def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 2007def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 2008def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 2009 2010def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2011def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2012def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2013 2014// ...alternate versions to be allocated odd register numbers: 2015def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2016def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2017def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2018 2019def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2020def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2021def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2022 2023// VST4 : Vector Store (multiple 4-element structures) 2024class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 2025 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2026 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 2027 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 2028 "", []>, Sched<[WriteVST4]> { 2029 let Rm = 0b1111; 2030 let Inst{5-4} = Rn{5-4}; 2031 let DecoderMethod = "DecodeVLDST4Instruction"; 2032} 2033 2034def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 2035def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 2036def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 2037 2038def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2039def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2040def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2041 2042// ...with address register writeback: 2043class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2044 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2045 (ins addrmode6:$Rn, am6offset:$Rm, 2046 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 2047 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 2048 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 2049 let Inst{5-4} = Rn{5-4}; 2050 let DecoderMethod = "DecodeVLDST4Instruction"; 2051} 2052 2053def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 2054def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 2055def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 2056 2057def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2058def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2059def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2060 2061// ...with double-spaced registers: 2062def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2063def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2064def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2065def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2066def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2067def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2068 2069def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2070def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2071def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2072 2073// ...alternate versions to be allocated odd register numbers: 2074def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2075def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2076def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2077 2078def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2079def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2080def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2081 2082} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2083 2084// Classes for VST*LN pseudo-instructions with multi-register operands. 2085// These are expanded to real instructions after register allocation. 2086class VSTQLNPseudo<InstrItinClass itin> 2087 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2088 itin, "">; 2089class VSTQLNWBPseudo<InstrItinClass itin> 2090 : PseudoNLdSt<(outs GPR:$wb), 2091 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2092 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2093class VSTQQLNPseudo<InstrItinClass itin> 2094 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2095 itin, "">; 2096class VSTQQLNWBPseudo<InstrItinClass itin> 2097 : PseudoNLdSt<(outs GPR:$wb), 2098 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2099 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2100class VSTQQQQLNPseudo<InstrItinClass itin> 2101 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2102 itin, "">; 2103class VSTQQQQLNWBPseudo<InstrItinClass itin> 2104 : PseudoNLdSt<(outs GPR:$wb), 2105 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2106 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2107 2108// VST1LN : Vector Store (single element from one lane) 2109class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2110 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2111 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2112 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2113 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2114 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, 2115 Sched<[WriteVST1]> { 2116 let Rm = 0b1111; 2117 let DecoderMethod = "DecodeVST1LN"; 2118} 2119class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2120 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> { 2121 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2122 addrmode6:$addr)]; 2123} 2124 2125def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2126 ARMvgetlaneu, addrmode6> { 2127 let Inst{7-5} = lane{2-0}; 2128} 2129def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2130 ARMvgetlaneu, addrmode6> { 2131 let Inst{7-6} = lane{1-0}; 2132 let Inst{4} = Rn{4}; 2133} 2134 2135def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2136 addrmode6oneL32> { 2137 let Inst{7} = lane{0}; 2138 let Inst{5-4} = Rn{5-4}; 2139} 2140 2141def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>; 2142def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>; 2143def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2144 2145let Predicates = [HasNEON] in { 2146def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2147 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2148def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2149 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2150 2151def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr), 2152 (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 2153def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr), 2154 (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2155} 2156 2157// ...with address register writeback: 2158class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2159 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2160 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2161 (ins AdrMode:$Rn, am6offset:$Rm, 2162 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2163 "\\{$Vd[$lane]\\}, $Rn$Rm", 2164 "$Rn.addr = $wb", 2165 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2166 AdrMode:$Rn, am6offset:$Rm))]>, 2167 Sched<[WriteVST1]> { 2168 let DecoderMethod = "DecodeVST1LN"; 2169} 2170class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2171 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> { 2172 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2173 addrmode6:$addr, am6offset:$offset))]; 2174} 2175 2176def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2177 ARMvgetlaneu, addrmode6> { 2178 let Inst{7-5} = lane{2-0}; 2179} 2180def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2181 ARMvgetlaneu, addrmode6> { 2182 let Inst{7-6} = lane{1-0}; 2183 let Inst{4} = Rn{4}; 2184} 2185def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2186 extractelt, addrmode6oneL32> { 2187 let Inst{7} = lane{0}; 2188 let Inst{5-4} = Rn{5-4}; 2189} 2190 2191def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>; 2192def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>; 2193def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2194 2195let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2196 2197// VST2LN : Vector Store (single 2-element structure from one lane) 2198class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2199 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2200 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2201 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2202 "", []>, Sched<[WriteVST1]> { 2203 let Rm = 0b1111; 2204 let Inst{4} = Rn{4}; 2205 let DecoderMethod = "DecodeVST2LN"; 2206} 2207 2208def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2209 let Inst{7-5} = lane{2-0}; 2210} 2211def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2212 let Inst{7-6} = lane{1-0}; 2213} 2214def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2215 let Inst{7} = lane{0}; 2216} 2217 2218def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2219def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2220def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2221 2222// ...with double-spaced registers: 2223def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2224 let Inst{7-6} = lane{1-0}; 2225 let Inst{4} = Rn{4}; 2226} 2227def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2228 let Inst{7} = lane{0}; 2229 let Inst{4} = Rn{4}; 2230} 2231 2232def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2233def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2234 2235// ...with address register writeback: 2236class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2237 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2238 (ins addrmode6:$Rn, am6offset:$Rm, 2239 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2240 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2241 "$Rn.addr = $wb", []> { 2242 let Inst{4} = Rn{4}; 2243 let DecoderMethod = "DecodeVST2LN"; 2244} 2245 2246def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2247 let Inst{7-5} = lane{2-0}; 2248} 2249def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2250 let Inst{7-6} = lane{1-0}; 2251} 2252def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2253 let Inst{7} = lane{0}; 2254} 2255 2256def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2257def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2258def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2259 2260def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2261 let Inst{7-6} = lane{1-0}; 2262} 2263def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2264 let Inst{7} = lane{0}; 2265} 2266 2267def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2268def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2269 2270// VST3LN : Vector Store (single 3-element structure from one lane) 2271class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2272 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2273 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2274 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2275 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, 2276 Sched<[WriteVST2]> { 2277 let Rm = 0b1111; 2278 let DecoderMethod = "DecodeVST3LN"; 2279} 2280 2281def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2282 let Inst{7-5} = lane{2-0}; 2283} 2284def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2285 let Inst{7-6} = lane{1-0}; 2286} 2287def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2288 let Inst{7} = lane{0}; 2289} 2290 2291def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2292def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2293def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2294 2295// ...with double-spaced registers: 2296def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2297 let Inst{7-6} = lane{1-0}; 2298} 2299def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2300 let Inst{7} = lane{0}; 2301} 2302 2303def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2304def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2305 2306// ...with address register writeback: 2307class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2308 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2309 (ins addrmode6:$Rn, am6offset:$Rm, 2310 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2311 IIC_VST3lnu, "vst3", Dt, 2312 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2313 "$Rn.addr = $wb", []> { 2314 let DecoderMethod = "DecodeVST3LN"; 2315} 2316 2317def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2318 let Inst{7-5} = lane{2-0}; 2319} 2320def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2321 let Inst{7-6} = lane{1-0}; 2322} 2323def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2324 let Inst{7} = lane{0}; 2325} 2326 2327def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2328def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2329def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2330 2331def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2332 let Inst{7-6} = lane{1-0}; 2333} 2334def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2335 let Inst{7} = lane{0}; 2336} 2337 2338def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2339def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2340 2341// VST4LN : Vector Store (single 4-element structure from one lane) 2342class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2343 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2344 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2345 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2346 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2347 "", []>, Sched<[WriteVST2]> { 2348 let Rm = 0b1111; 2349 let Inst{4} = Rn{4}; 2350 let DecoderMethod = "DecodeVST4LN"; 2351} 2352 2353def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2354 let Inst{7-5} = lane{2-0}; 2355} 2356def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2357 let Inst{7-6} = lane{1-0}; 2358} 2359def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2360 let Inst{7} = lane{0}; 2361 let Inst{5} = Rn{5}; 2362} 2363 2364def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2365def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2366def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2367 2368// ...with double-spaced registers: 2369def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2370 let Inst{7-6} = lane{1-0}; 2371} 2372def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2373 let Inst{7} = lane{0}; 2374 let Inst{5} = Rn{5}; 2375} 2376 2377def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2378def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2379 2380// ...with address register writeback: 2381class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2382 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2383 (ins addrmode6:$Rn, am6offset:$Rm, 2384 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2385 IIC_VST4lnu, "vst4", Dt, 2386 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2387 "$Rn.addr = $wb", []> { 2388 let Inst{4} = Rn{4}; 2389 let DecoderMethod = "DecodeVST4LN"; 2390} 2391 2392def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2393 let Inst{7-5} = lane{2-0}; 2394} 2395def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2396 let Inst{7-6} = lane{1-0}; 2397} 2398def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2399 let Inst{7} = lane{0}; 2400 let Inst{5} = Rn{5}; 2401} 2402 2403def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2404def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2405def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2406 2407def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2408 let Inst{7-6} = lane{1-0}; 2409} 2410def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2411 let Inst{7} = lane{0}; 2412 let Inst{5} = Rn{5}; 2413} 2414 2415def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2416def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2417 2418} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2419 2420// Use vld1/vst1 for unaligned f64 load / store 2421let Predicates = [IsLE,HasNEON] in { 2422def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2423 (VLD1d16 addrmode6:$addr)>; 2424def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2425 (VST1d16 addrmode6:$addr, DPR:$value)>; 2426def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2427 (VLD1d8 addrmode6:$addr)>; 2428def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2429 (VST1d8 addrmode6:$addr, DPR:$value)>; 2430} 2431let Predicates = [IsBE,HasNEON] in { 2432def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2433 (VLD1d64 addrmode6:$addr)>; 2434def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2435 (VST1d64 addrmode6:$addr, DPR:$value)>; 2436} 2437 2438// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2439// load / store if it's legal. 2440let Predicates = [HasNEON] in { 2441def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2442 (VLD1q64 addrmode6:$addr)>; 2443def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2444 (VST1q64 addrmode6:$addr, QPR:$value)>; 2445} 2446let Predicates = [IsLE,HasNEON] in { 2447def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2448 (VLD1q32 addrmode6:$addr)>; 2449def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2450 (VST1q32 addrmode6:$addr, QPR:$value)>; 2451def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2452 (VLD1q16 addrmode6:$addr)>; 2453def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2454 (VST1q16 addrmode6:$addr, QPR:$value)>; 2455def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2456 (VLD1q8 addrmode6:$addr)>; 2457def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2458 (VST1q8 addrmode6:$addr, QPR:$value)>; 2459} 2460 2461//===----------------------------------------------------------------------===// 2462// NEON pattern fragments 2463//===----------------------------------------------------------------------===// 2464 2465// Extract D sub-registers of Q registers. 2466def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2467 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2468 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N), 2469 MVT::i32); 2470}]>; 2471def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2472 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2473 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N), 2474 MVT::i32); 2475}]>; 2476def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2477 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2478 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N), 2479 MVT::i32); 2480}]>; 2481def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2482 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2483 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N), 2484 MVT::i32); 2485}]>; 2486 2487// Extract S sub-registers of Q/D registers. 2488def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2489 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2490 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N), 2491 MVT::i32); 2492}]>; 2493 2494// Extract S sub-registers of Q/D registers containing a given f16 lane. 2495def SSubReg_f16_reg : SDNodeXForm<imm, [{ 2496 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2497 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue()/2, SDLoc(N), 2498 MVT::i32); 2499}]>; 2500 2501// Translate lane numbers from Q registers to D subregs. 2502def SubReg_i8_lane : SDNodeXForm<imm, [{ 2503 return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32); 2504}]>; 2505def SubReg_i16_lane : SDNodeXForm<imm, [{ 2506 return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32); 2507}]>; 2508def SubReg_i32_lane : SDNodeXForm<imm, [{ 2509 return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32); 2510}]>; 2511 2512//===----------------------------------------------------------------------===// 2513// Instruction Classes 2514//===----------------------------------------------------------------------===// 2515 2516// Basic 2-register operations: double- and quad-register. 2517class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2518 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2519 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2520 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2521 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2522 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2523class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2524 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2525 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2526 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2527 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2528 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2529 2530// Basic 2-register intrinsics, both double- and quad-register. 2531class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2532 bits<2> op17_16, bits<5> op11_7, bit op4, 2533 InstrItinClass itin, string OpcodeStr, string Dt, 2534 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2535 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2536 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2537 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2538class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2539 bits<2> op17_16, bits<5> op11_7, bit op4, 2540 InstrItinClass itin, string OpcodeStr, string Dt, 2541 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2542 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2543 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2544 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2545 2546// Same as above, but not predicated. 2547class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2548 InstrItinClass itin, string OpcodeStr, string Dt, 2549 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2550 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2551 itin, OpcodeStr, Dt, 2552 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2553 2554class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2555 InstrItinClass itin, string OpcodeStr, string Dt, 2556 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2557 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2558 itin, OpcodeStr, Dt, 2559 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2560 2561// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2562class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2563 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2564 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2565 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2566 itin, OpcodeStr, Dt, 2567 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2568 2569// Same as N2VQIntXnp but with Vd as a src register. 2570class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2571 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2572 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2573 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2574 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2575 itin, OpcodeStr, Dt, 2576 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2577 let Constraints = "$src = $Vd"; 2578} 2579 2580// Narrow 2-register operations. 2581class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2582 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2583 InstrItinClass itin, string OpcodeStr, string Dt, 2584 ValueType TyD, ValueType TyQ, SDNode OpNode> 2585 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2586 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2587 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2588 2589// Narrow 2-register intrinsics. 2590class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2591 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2592 InstrItinClass itin, string OpcodeStr, string Dt, 2593 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2594 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2595 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2596 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2597 2598// Long 2-register operations (currently only used for VMOVL). 2599class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2600 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2601 InstrItinClass itin, string OpcodeStr, string Dt, 2602 ValueType TyQ, ValueType TyD, SDNode OpNode> 2603 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2604 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2605 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2606 2607// Long 2-register intrinsics. 2608class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2609 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2610 InstrItinClass itin, string OpcodeStr, string Dt, 2611 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2612 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2613 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2614 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2615 2616// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2617class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2618 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2619 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2620 OpcodeStr, Dt, "$Vd, $Vm", 2621 "$src1 = $Vd, $src2 = $Vm", []>; 2622class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2623 InstrItinClass itin, string OpcodeStr, string Dt> 2624 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2625 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2626 "$src1 = $Vd, $src2 = $Vm", []>; 2627 2628// Basic 3-register operations: double- and quad-register. 2629class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2630 InstrItinClass itin, string OpcodeStr, string Dt, 2631 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2632 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2633 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2634 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2635 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2636 // All of these have a two-operand InstAlias. 2637 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2638 let isCommutable = Commutable; 2639} 2640// Same as N3VD but no data type. 2641class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2642 InstrItinClass itin, string OpcodeStr, 2643 ValueType ResTy, ValueType OpTy, 2644 SDNode OpNode, bit Commutable> 2645 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2646 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2647 OpcodeStr, "$Vd, $Vn, $Vm", "", 2648 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2649 // All of these have a two-operand InstAlias. 2650 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2651 let isCommutable = Commutable; 2652} 2653 2654class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2655 InstrItinClass itin, string OpcodeStr, string Dt, 2656 ValueType Ty, SDNode ShOp> 2657 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2658 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2659 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2660 [(set (Ty DPR:$Vd), 2661 (Ty (ShOp (Ty DPR:$Vn), 2662 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2663 // All of these have a two-operand InstAlias. 2664 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2665 let isCommutable = 0; 2666} 2667class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2668 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2669 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2670 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2671 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2672 [(set (Ty DPR:$Vd), 2673 (Ty (ShOp (Ty DPR:$Vn), 2674 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2675 // All of these have a two-operand InstAlias. 2676 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2677 let isCommutable = 0; 2678} 2679 2680class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2681 InstrItinClass itin, string OpcodeStr, string Dt, 2682 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2683 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2684 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2685 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2686 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2687 // All of these have a two-operand InstAlias. 2688 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2689 let isCommutable = Commutable; 2690} 2691class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2692 InstrItinClass itin, string OpcodeStr, 2693 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2694 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2695 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2696 OpcodeStr, "$Vd, $Vn, $Vm", "", 2697 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2698 // All of these have a two-operand InstAlias. 2699 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2700 let isCommutable = Commutable; 2701} 2702class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2703 InstrItinClass itin, string OpcodeStr, string Dt, 2704 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2705 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2706 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2707 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2708 [(set (ResTy QPR:$Vd), 2709 (ResTy (ShOp (ResTy QPR:$Vn), 2710 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2711 imm:$lane)))))]> { 2712 // All of these have a two-operand InstAlias. 2713 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2714 let isCommutable = 0; 2715} 2716class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2717 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2718 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2719 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2720 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2721 [(set (ResTy QPR:$Vd), 2722 (ResTy (ShOp (ResTy QPR:$Vn), 2723 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2724 imm:$lane)))))]> { 2725 // All of these have a two-operand InstAlias. 2726 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2727 let isCommutable = 0; 2728} 2729 2730// Basic 3-register intrinsics, both double- and quad-register. 2731class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2732 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2733 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2734 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2735 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2736 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2737 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2738 // All of these have a two-operand InstAlias. 2739 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2740 let isCommutable = Commutable; 2741} 2742 2743class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2744 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2745 string Dt, ValueType ResTy, ValueType OpTy, 2746 SDPatternOperator IntOp, bit Commutable> 2747 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2748 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2749 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2750 2751class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2752 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2753 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2754 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2755 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2756 [(set (Ty DPR:$Vd), 2757 (Ty (IntOp (Ty DPR:$Vn), 2758 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2759 imm:$lane)))))]> { 2760 let isCommutable = 0; 2761} 2762 2763class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2764 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2765 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2766 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2767 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2768 [(set (Ty DPR:$Vd), 2769 (Ty (IntOp (Ty DPR:$Vn), 2770 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2771 let isCommutable = 0; 2772} 2773class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2774 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2775 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2776 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2777 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2778 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2779 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2780 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2781 let isCommutable = 0; 2782} 2783 2784class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2785 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2786 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2787 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2788 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2789 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2790 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2791 // All of these have a two-operand InstAlias. 2792 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2793 let isCommutable = Commutable; 2794} 2795 2796class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2797 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2798 string Dt, ValueType ResTy, ValueType OpTy, 2799 SDPatternOperator IntOp, bit Commutable> 2800 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2801 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2802 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2803 2804// Same as N3VQIntnp but with Vd as a src register. 2805class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2806 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2807 string Dt, ValueType ResTy, ValueType OpTy, 2808 SDPatternOperator IntOp, bit Commutable> 2809 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2810 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2811 f, itin, OpcodeStr, Dt, 2812 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2813 (OpTy QPR:$Vm))))]> { 2814 let Constraints = "$src = $Vd"; 2815} 2816 2817class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2818 string OpcodeStr, string Dt, 2819 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2820 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2821 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2822 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2823 [(set (ResTy QPR:$Vd), 2824 (ResTy (IntOp (ResTy QPR:$Vn), 2825 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2826 imm:$lane)))))]> { 2827 let isCommutable = 0; 2828} 2829class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2830 string OpcodeStr, string Dt, 2831 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2832 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2833 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2834 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2835 [(set (ResTy QPR:$Vd), 2836 (ResTy (IntOp (ResTy QPR:$Vn), 2837 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2838 imm:$lane)))))]> { 2839 let isCommutable = 0; 2840} 2841class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2842 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2843 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2844 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2845 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2846 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2847 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2848 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2849 let isCommutable = 0; 2850} 2851 2852// Multiply-Add/Sub operations: double- and quad-register. 2853class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2854 InstrItinClass itin, string OpcodeStr, string Dt, 2855 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2856 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2857 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2858 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2859 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2860 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2861 2862class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2863 string OpcodeStr, string Dt, 2864 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2865 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2866 (outs DPR:$Vd), 2867 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2868 NVMulSLFrm, itin, 2869 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2870 [(set (Ty DPR:$Vd), 2871 (Ty (ShOp (Ty DPR:$src1), 2872 (Ty (MulOp DPR:$Vn, 2873 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2874 imm:$lane)))))))]>; 2875class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2876 string OpcodeStr, string Dt, 2877 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2878 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2879 (outs DPR:$Vd), 2880 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2881 NVMulSLFrm, itin, 2882 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2883 [(set (Ty DPR:$Vd), 2884 (Ty (ShOp (Ty DPR:$src1), 2885 (Ty (MulOp DPR:$Vn, 2886 (Ty (ARMvduplane (Ty DPR_8:$Vm), 2887 imm:$lane)))))))]>; 2888 2889class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2890 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2891 SDPatternOperator MulOp, SDPatternOperator OpNode> 2892 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2893 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2894 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2895 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2896 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2897class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2898 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2899 SDPatternOperator MulOp, SDPatternOperator ShOp> 2900 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2901 (outs QPR:$Vd), 2902 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2903 NVMulSLFrm, itin, 2904 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2905 [(set (ResTy QPR:$Vd), 2906 (ResTy (ShOp (ResTy QPR:$src1), 2907 (ResTy (MulOp QPR:$Vn, 2908 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2909 imm:$lane)))))))]>; 2910class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2911 string OpcodeStr, string Dt, 2912 ValueType ResTy, ValueType OpTy, 2913 SDPatternOperator MulOp, SDPatternOperator ShOp> 2914 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2915 (outs QPR:$Vd), 2916 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2917 NVMulSLFrm, itin, 2918 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2919 [(set (ResTy QPR:$Vd), 2920 (ResTy (ShOp (ResTy QPR:$src1), 2921 (ResTy (MulOp QPR:$Vn, 2922 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2923 imm:$lane)))))))]>; 2924 2925// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2926class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2927 InstrItinClass itin, string OpcodeStr, string Dt, 2928 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2929 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2930 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2931 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2932 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2933 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2934class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2935 InstrItinClass itin, string OpcodeStr, string Dt, 2936 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2937 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2938 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2939 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2940 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2941 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2942 2943// Neon 3-argument intrinsics, both double- and quad-register. 2944// The destination register is also used as the first source operand register. 2945class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2946 InstrItinClass itin, string OpcodeStr, string Dt, 2947 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2948 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2949 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2950 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2951 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2952 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2953class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2954 InstrItinClass itin, string OpcodeStr, string Dt, 2955 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2956 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2957 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2958 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2959 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2960 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2961 2962// Long Multiply-Add/Sub operations. 2963class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2964 InstrItinClass itin, string OpcodeStr, string Dt, 2965 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2966 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2967 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2968 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2969 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2970 (TyQ (MulOp (TyD DPR:$Vn), 2971 (TyD DPR:$Vm)))))]>; 2972class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2973 InstrItinClass itin, string OpcodeStr, string Dt, 2974 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2975 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2976 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2977 NVMulSLFrm, itin, 2978 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2979 [(set QPR:$Vd, 2980 (OpNode (TyQ QPR:$src1), 2981 (TyQ (MulOp (TyD DPR:$Vn), 2982 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm), 2983 imm:$lane))))))]>; 2984class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2985 InstrItinClass itin, string OpcodeStr, string Dt, 2986 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2987 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2988 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2989 NVMulSLFrm, itin, 2990 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2991 [(set QPR:$Vd, 2992 (OpNode (TyQ QPR:$src1), 2993 (TyQ (MulOp (TyD DPR:$Vn), 2994 (TyD (ARMvduplane (TyD DPR_8:$Vm), 2995 imm:$lane))))))]>; 2996 2997// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2998class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2999 InstrItinClass itin, string OpcodeStr, string Dt, 3000 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3001 SDNode OpNode> 3002 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3003 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3004 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 3005 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 3006 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3007 (TyD DPR:$Vm)))))))]>; 3008 3009// Neon Long 3-argument intrinsic. The destination register is 3010// a quad-register and is also used as the first source operand register. 3011class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3012 InstrItinClass itin, string OpcodeStr, string Dt, 3013 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 3014 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3015 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3016 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 3017 [(set QPR:$Vd, 3018 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 3019class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3020 string OpcodeStr, string Dt, 3021 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3022 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3023 (outs QPR:$Vd), 3024 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3025 NVMulSLFrm, itin, 3026 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3027 [(set (ResTy QPR:$Vd), 3028 (ResTy (IntOp (ResTy QPR:$src1), 3029 (OpTy DPR:$Vn), 3030 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3031 imm:$lane)))))]>; 3032class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3033 InstrItinClass itin, string OpcodeStr, string Dt, 3034 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3035 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3036 (outs QPR:$Vd), 3037 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3038 NVMulSLFrm, itin, 3039 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3040 [(set (ResTy QPR:$Vd), 3041 (ResTy (IntOp (ResTy QPR:$src1), 3042 (OpTy DPR:$Vn), 3043 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3044 imm:$lane)))))]>; 3045 3046// Narrowing 3-register intrinsics. 3047class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3048 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 3049 SDPatternOperator IntOp, bit Commutable> 3050 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3051 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 3052 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3053 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 3054 let isCommutable = Commutable; 3055} 3056 3057// Long 3-register operations. 3058class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3059 InstrItinClass itin, string OpcodeStr, string Dt, 3060 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 3061 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3062 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3063 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3064 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3065 let isCommutable = Commutable; 3066} 3067 3068class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 3069 InstrItinClass itin, string OpcodeStr, string Dt, 3070 ValueType TyQ, ValueType TyD, SDNode OpNode> 3071 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3072 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3073 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3074 [(set QPR:$Vd, 3075 (TyQ (OpNode (TyD DPR:$Vn), 3076 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 3077class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3078 InstrItinClass itin, string OpcodeStr, string Dt, 3079 ValueType TyQ, ValueType TyD, SDNode OpNode> 3080 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3081 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3082 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3083 [(set QPR:$Vd, 3084 (TyQ (OpNode (TyD DPR:$Vn), 3085 (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3086 3087// Long 3-register operations with explicitly extended operands. 3088class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3089 InstrItinClass itin, string OpcodeStr, string Dt, 3090 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 3091 bit Commutable> 3092 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3093 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3094 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3095 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3096 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3097 let isCommutable = Commutable; 3098} 3099 3100// Long 3-register intrinsics with explicit extend (VABDL). 3101class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3102 InstrItinClass itin, string OpcodeStr, string Dt, 3103 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3104 bit Commutable> 3105 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3106 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3107 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3108 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3109 (TyD DPR:$Vm))))))]> { 3110 let isCommutable = Commutable; 3111} 3112 3113// Long 3-register intrinsics. 3114class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3115 InstrItinClass itin, string OpcodeStr, string Dt, 3116 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3117 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3118 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3119 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3120 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3121 let isCommutable = Commutable; 3122} 3123 3124// Same as above, but not predicated. 3125class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3126 bit op4, InstrItinClass itin, string OpcodeStr, 3127 string Dt, ValueType ResTy, ValueType OpTy, 3128 SDPatternOperator IntOp, bit Commutable> 3129 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3130 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3131 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 3132 3133class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3134 string OpcodeStr, string Dt, 3135 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3136 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3137 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3138 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3139 [(set (ResTy QPR:$Vd), 3140 (ResTy (IntOp (OpTy DPR:$Vn), 3141 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3142 imm:$lane)))))]>; 3143class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3144 InstrItinClass itin, string OpcodeStr, string Dt, 3145 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3146 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3147 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3148 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3149 [(set (ResTy QPR:$Vd), 3150 (ResTy (IntOp (OpTy DPR:$Vn), 3151 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3152 imm:$lane)))))]>; 3153 3154// Wide 3-register operations. 3155class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3156 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3157 SDNode OpNode, SDNode ExtOp, bit Commutable> 3158 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3159 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3160 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3161 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3162 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3163 // All of these have a two-operand InstAlias. 3164 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3165 let isCommutable = Commutable; 3166} 3167 3168// Pairwise long 2-register intrinsics, both double- and quad-register. 3169class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3170 bits<2> op17_16, bits<5> op11_7, bit op4, 3171 string OpcodeStr, string Dt, 3172 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3173 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3174 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3175 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3176class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3177 bits<2> op17_16, bits<5> op11_7, bit op4, 3178 string OpcodeStr, string Dt, 3179 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3180 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3181 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3182 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3183 3184// Pairwise long 2-register accumulate intrinsics, 3185// both double- and quad-register. 3186// The destination register is also used as the first source operand register. 3187class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3188 bits<2> op17_16, bits<5> op11_7, bit op4, 3189 string OpcodeStr, string Dt, 3190 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3191 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3192 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3193 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3194 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3195class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3196 bits<2> op17_16, bits<5> op11_7, bit op4, 3197 string OpcodeStr, string Dt, 3198 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3199 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3200 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3201 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3202 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3203 3204// Shift by immediate, 3205// both double- and quad-register. 3206let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3207class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3208 Format f, InstrItinClass itin, Operand ImmTy, 3209 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3210 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3211 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3212 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3213 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3214class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3215 Format f, InstrItinClass itin, Operand ImmTy, 3216 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3217 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3218 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3219 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3220 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3221} 3222 3223// Long shift by immediate. 3224class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3225 string OpcodeStr, string Dt, 3226 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3227 SDPatternOperator OpNode> 3228 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3229 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3230 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3231 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3232 3233// Narrow shift by immediate. 3234class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3235 InstrItinClass itin, string OpcodeStr, string Dt, 3236 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3237 SDPatternOperator OpNode> 3238 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3239 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3240 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3241 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3242 (i32 ImmTy:$SIMM))))]>; 3243 3244// Shift right by immediate and accumulate, 3245// both double- and quad-register. 3246let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3247class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3248 Operand ImmTy, string OpcodeStr, string Dt, 3249 ValueType Ty, SDNode ShOp> 3250 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3251 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3252 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3253 [(set DPR:$Vd, (Ty (add DPR:$src1, 3254 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3255class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3256 Operand ImmTy, string OpcodeStr, string Dt, 3257 ValueType Ty, SDNode ShOp> 3258 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3259 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3260 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3261 [(set QPR:$Vd, (Ty (add QPR:$src1, 3262 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3263} 3264 3265// Shift by immediate and insert, 3266// both double- and quad-register. 3267let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3268class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3269 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3270 ValueType Ty,SDNode ShOp> 3271 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3272 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3273 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3274 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3275class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3276 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3277 ValueType Ty,SDNode ShOp> 3278 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3279 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3280 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3281 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3282} 3283 3284// Convert, with fractional bits immediate, 3285// both double- and quad-register. 3286class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3287 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3288 SDPatternOperator IntOp> 3289 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3290 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3291 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3292 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3293class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3294 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3295 SDPatternOperator IntOp> 3296 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3297 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3298 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3299 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3300 3301//===----------------------------------------------------------------------===// 3302// Multiclasses 3303//===----------------------------------------------------------------------===// 3304 3305// Abbreviations used in multiclass suffixes: 3306// Q = quarter int (8 bit) elements 3307// H = half int (16 bit) elements 3308// S = single int (32 bit) elements 3309// D = double int (64 bit) elements 3310 3311// Neon 2-register vector operations and intrinsics. 3312 3313// Neon 2-register comparisons. 3314// source operand element sizes of 8, 16 and 32 bits: 3315multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3316 bits<5> op11_7, bit op4, string opc, string Dt, 3317 string asm, PatFrag fc> { 3318 // 64-bit vector types. 3319 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3320 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3321 opc, !strconcat(Dt, "8"), asm, "", 3322 [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>; 3323 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3324 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3325 opc, !strconcat(Dt, "16"), asm, "", 3326 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>; 3327 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3328 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3329 opc, !strconcat(Dt, "32"), asm, "", 3330 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>; 3331 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3332 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3333 opc, "f32", asm, "", 3334 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> { 3335 let Inst{10} = 1; // overwrite F = 1 3336 } 3337 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3338 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3339 opc, "f16", asm, "", 3340 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>, 3341 Requires<[HasNEON,HasFullFP16]> { 3342 let Inst{10} = 1; // overwrite F = 1 3343 } 3344 3345 // 128-bit vector types. 3346 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3347 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3348 opc, !strconcat(Dt, "8"), asm, "", 3349 [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>; 3350 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3351 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3352 opc, !strconcat(Dt, "16"), asm, "", 3353 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>; 3354 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3355 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3356 opc, !strconcat(Dt, "32"), asm, "", 3357 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>; 3358 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3359 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3360 opc, "f32", asm, "", 3361 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> { 3362 let Inst{10} = 1; // overwrite F = 1 3363 } 3364 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3365 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3366 opc, "f16", asm, "", 3367 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>, 3368 Requires<[HasNEON,HasFullFP16]> { 3369 let Inst{10} = 1; // overwrite F = 1 3370 } 3371} 3372 3373// Neon 3-register comparisons. 3374class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3375 InstrItinClass itin, string OpcodeStr, string Dt, 3376 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3377 : N3V<op24, op23, op21_20, op11_8, 1, op4, 3378 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 3379 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3380 [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> { 3381 // All of these have a two-operand InstAlias. 3382 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3383 let isCommutable = Commutable; 3384} 3385 3386class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3387 InstrItinClass itin, string OpcodeStr, string Dt, 3388 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3389 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3390 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3391 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3392 [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> { 3393 // All of these have a two-operand InstAlias. 3394 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3395 let isCommutable = Commutable; 3396} 3397 3398multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4, 3399 InstrItinClass itinD16, InstrItinClass itinD32, 3400 InstrItinClass itinQ16, InstrItinClass itinQ32, 3401 string OpcodeStr, string Dt, 3402 PatFrag fc, bit Commutable = 0> { 3403 // 64-bit vector types. 3404 def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16, 3405 OpcodeStr, !strconcat(Dt, "8"), 3406 v8i8, v8i8, fc, Commutable>; 3407 def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16, 3408 OpcodeStr, !strconcat(Dt, "16"), 3409 v4i16, v4i16, fc, Commutable>; 3410 def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32, 3411 OpcodeStr, !strconcat(Dt, "32"), 3412 v2i32, v2i32, fc, Commutable>; 3413 3414 // 128-bit vector types. 3415 def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16, 3416 OpcodeStr, !strconcat(Dt, "8"), 3417 v16i8, v16i8, fc, Commutable>; 3418 def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16, 3419 OpcodeStr, !strconcat(Dt, "16"), 3420 v8i16, v8i16, fc, Commutable>; 3421 def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32, 3422 OpcodeStr, !strconcat(Dt, "32"), 3423 v4i32, v4i32, fc, Commutable>; 3424} 3425 3426 3427// Neon 2-register vector intrinsics, 3428// element sizes of 8, 16 and 32 bits: 3429multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3430 bits<5> op11_7, bit op4, 3431 InstrItinClass itinD, InstrItinClass itinQ, 3432 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3433 // 64-bit vector types. 3434 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3435 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3436 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3437 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3438 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3439 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3440 3441 // 128-bit vector types. 3442 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3443 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3444 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3445 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3446 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3447 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3448} 3449 3450 3451// Neon Narrowing 2-register vector operations, 3452// source operand element sizes of 16, 32 and 64 bits: 3453multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3454 bits<5> op11_7, bit op6, bit op4, 3455 InstrItinClass itin, string OpcodeStr, string Dt, 3456 SDNode OpNode> { 3457 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3458 itin, OpcodeStr, !strconcat(Dt, "16"), 3459 v8i8, v8i16, OpNode>; 3460 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3461 itin, OpcodeStr, !strconcat(Dt, "32"), 3462 v4i16, v4i32, OpNode>; 3463 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3464 itin, OpcodeStr, !strconcat(Dt, "64"), 3465 v2i32, v2i64, OpNode>; 3466} 3467 3468// Neon Narrowing 2-register vector intrinsics, 3469// source operand element sizes of 16, 32 and 64 bits: 3470multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3471 bits<5> op11_7, bit op6, bit op4, 3472 InstrItinClass itin, string OpcodeStr, string Dt, 3473 SDPatternOperator IntOp> { 3474 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3475 itin, OpcodeStr, !strconcat(Dt, "16"), 3476 v8i8, v8i16, IntOp>; 3477 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3478 itin, OpcodeStr, !strconcat(Dt, "32"), 3479 v4i16, v4i32, IntOp>; 3480 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3481 itin, OpcodeStr, !strconcat(Dt, "64"), 3482 v2i32, v2i64, IntOp>; 3483} 3484 3485 3486// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3487// source operand element sizes of 16, 32 and 64 bits: 3488multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3489 string OpcodeStr, string Dt, SDNode OpNode> { 3490 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3491 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3492 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3493 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3494 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3495 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3496} 3497 3498 3499// Neon 3-register vector operations. 3500 3501// First with only element sizes of 8, 16 and 32 bits: 3502multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3503 InstrItinClass itinD16, InstrItinClass itinD32, 3504 InstrItinClass itinQ16, InstrItinClass itinQ32, 3505 string OpcodeStr, string Dt, 3506 SDNode OpNode, bit Commutable = 0> { 3507 // 64-bit vector types. 3508 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3509 OpcodeStr, !strconcat(Dt, "8"), 3510 v8i8, v8i8, OpNode, Commutable>; 3511 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3512 OpcodeStr, !strconcat(Dt, "16"), 3513 v4i16, v4i16, OpNode, Commutable>; 3514 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3515 OpcodeStr, !strconcat(Dt, "32"), 3516 v2i32, v2i32, OpNode, Commutable>; 3517 3518 // 128-bit vector types. 3519 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3520 OpcodeStr, !strconcat(Dt, "8"), 3521 v16i8, v16i8, OpNode, Commutable>; 3522 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3523 OpcodeStr, !strconcat(Dt, "16"), 3524 v8i16, v8i16, OpNode, Commutable>; 3525 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3526 OpcodeStr, !strconcat(Dt, "32"), 3527 v4i32, v4i32, OpNode, Commutable>; 3528} 3529 3530multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3531 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3532 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3533 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3534 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3535 v4i32, v2i32, ShOp>; 3536} 3537 3538// ....then also with element size 64 bits: 3539multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3540 InstrItinClass itinD, InstrItinClass itinQ, 3541 string OpcodeStr, string Dt, 3542 SDNode OpNode, bit Commutable = 0> 3543 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3544 OpcodeStr, Dt, OpNode, Commutable> { 3545 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3546 OpcodeStr, !strconcat(Dt, "64"), 3547 v1i64, v1i64, OpNode, Commutable>; 3548 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3549 OpcodeStr, !strconcat(Dt, "64"), 3550 v2i64, v2i64, OpNode, Commutable>; 3551} 3552 3553 3554// Neon 3-register vector intrinsics. 3555 3556// First with only element sizes of 16 and 32 bits: 3557multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3558 InstrItinClass itinD16, InstrItinClass itinD32, 3559 InstrItinClass itinQ16, InstrItinClass itinQ32, 3560 string OpcodeStr, string Dt, 3561 SDPatternOperator IntOp, bit Commutable = 0> { 3562 // 64-bit vector types. 3563 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3564 OpcodeStr, !strconcat(Dt, "16"), 3565 v4i16, v4i16, IntOp, Commutable>; 3566 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3567 OpcodeStr, !strconcat(Dt, "32"), 3568 v2i32, v2i32, IntOp, Commutable>; 3569 3570 // 128-bit vector types. 3571 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3572 OpcodeStr, !strconcat(Dt, "16"), 3573 v8i16, v8i16, IntOp, Commutable>; 3574 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3575 OpcodeStr, !strconcat(Dt, "32"), 3576 v4i32, v4i32, IntOp, Commutable>; 3577} 3578multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3579 InstrItinClass itinD16, InstrItinClass itinD32, 3580 InstrItinClass itinQ16, InstrItinClass itinQ32, 3581 string OpcodeStr, string Dt, 3582 SDPatternOperator IntOp> { 3583 // 64-bit vector types. 3584 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3585 OpcodeStr, !strconcat(Dt, "16"), 3586 v4i16, v4i16, IntOp>; 3587 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3588 OpcodeStr, !strconcat(Dt, "32"), 3589 v2i32, v2i32, IntOp>; 3590 3591 // 128-bit vector types. 3592 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3593 OpcodeStr, !strconcat(Dt, "16"), 3594 v8i16, v8i16, IntOp>; 3595 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3596 OpcodeStr, !strconcat(Dt, "32"), 3597 v4i32, v4i32, IntOp>; 3598} 3599 3600multiclass N3VIntSL_HS<bits<4> op11_8, 3601 InstrItinClass itinD16, InstrItinClass itinD32, 3602 InstrItinClass itinQ16, InstrItinClass itinQ32, 3603 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3604 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3605 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3606 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3607 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3608 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3609 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3610 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3611 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3612} 3613 3614// ....then also with element size of 8 bits: 3615multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3616 InstrItinClass itinD16, InstrItinClass itinD32, 3617 InstrItinClass itinQ16, InstrItinClass itinQ32, 3618 string OpcodeStr, string Dt, 3619 SDPatternOperator IntOp, bit Commutable = 0> 3620 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3621 OpcodeStr, Dt, IntOp, Commutable> { 3622 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3623 OpcodeStr, !strconcat(Dt, "8"), 3624 v8i8, v8i8, IntOp, Commutable>; 3625 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3626 OpcodeStr, !strconcat(Dt, "8"), 3627 v16i8, v16i8, IntOp, Commutable>; 3628} 3629multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3630 InstrItinClass itinD16, InstrItinClass itinD32, 3631 InstrItinClass itinQ16, InstrItinClass itinQ32, 3632 string OpcodeStr, string Dt, 3633 SDPatternOperator IntOp> 3634 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3635 OpcodeStr, Dt, IntOp> { 3636 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3637 OpcodeStr, !strconcat(Dt, "8"), 3638 v8i8, v8i8, IntOp>; 3639 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3640 OpcodeStr, !strconcat(Dt, "8"), 3641 v16i8, v16i8, IntOp>; 3642} 3643 3644 3645// ....then also with element size of 64 bits: 3646multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3647 InstrItinClass itinD16, InstrItinClass itinD32, 3648 InstrItinClass itinQ16, InstrItinClass itinQ32, 3649 string OpcodeStr, string Dt, 3650 SDPatternOperator IntOp, bit Commutable = 0> 3651 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3652 OpcodeStr, Dt, IntOp, Commutable> { 3653 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3654 OpcodeStr, !strconcat(Dt, "64"), 3655 v1i64, v1i64, IntOp, Commutable>; 3656 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3657 OpcodeStr, !strconcat(Dt, "64"), 3658 v2i64, v2i64, IntOp, Commutable>; 3659} 3660multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3661 InstrItinClass itinD16, InstrItinClass itinD32, 3662 InstrItinClass itinQ16, InstrItinClass itinQ32, 3663 string OpcodeStr, string Dt, 3664 SDPatternOperator IntOp> 3665 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3666 OpcodeStr, Dt, IntOp> { 3667 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3668 OpcodeStr, !strconcat(Dt, "64"), 3669 v1i64, v1i64, IntOp>; 3670 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3671 OpcodeStr, !strconcat(Dt, "64"), 3672 v2i64, v2i64, IntOp>; 3673} 3674 3675// Neon Narrowing 3-register vector intrinsics, 3676// source operand element sizes of 16, 32 and 64 bits: 3677multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3678 string OpcodeStr, string Dt, 3679 SDPatternOperator IntOp, bit Commutable = 0> { 3680 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3681 OpcodeStr, !strconcat(Dt, "16"), 3682 v8i8, v8i16, IntOp, Commutable>; 3683 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3684 OpcodeStr, !strconcat(Dt, "32"), 3685 v4i16, v4i32, IntOp, Commutable>; 3686 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3687 OpcodeStr, !strconcat(Dt, "64"), 3688 v2i32, v2i64, IntOp, Commutable>; 3689} 3690 3691 3692// Neon Long 3-register vector operations. 3693 3694multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3695 InstrItinClass itin16, InstrItinClass itin32, 3696 string OpcodeStr, string Dt, 3697 SDNode OpNode, bit Commutable = 0> { 3698 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3699 OpcodeStr, !strconcat(Dt, "8"), 3700 v8i16, v8i8, OpNode, Commutable>; 3701 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3702 OpcodeStr, !strconcat(Dt, "16"), 3703 v4i32, v4i16, OpNode, Commutable>; 3704 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3705 OpcodeStr, !strconcat(Dt, "32"), 3706 v2i64, v2i32, OpNode, Commutable>; 3707} 3708 3709multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3710 InstrItinClass itin, string OpcodeStr, string Dt, 3711 SDNode OpNode> { 3712 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3713 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3714 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3715 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3716} 3717 3718multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3719 InstrItinClass itin16, InstrItinClass itin32, 3720 string OpcodeStr, string Dt, 3721 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3722 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3723 OpcodeStr, !strconcat(Dt, "8"), 3724 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3725 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3726 OpcodeStr, !strconcat(Dt, "16"), 3727 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3728 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3729 OpcodeStr, !strconcat(Dt, "32"), 3730 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3731} 3732 3733// Neon Long 3-register vector intrinsics. 3734 3735// First with only element sizes of 16 and 32 bits: 3736multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3737 InstrItinClass itin16, InstrItinClass itin32, 3738 string OpcodeStr, string Dt, 3739 SDPatternOperator IntOp, bit Commutable = 0> { 3740 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3741 OpcodeStr, !strconcat(Dt, "16"), 3742 v4i32, v4i16, IntOp, Commutable>; 3743 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3744 OpcodeStr, !strconcat(Dt, "32"), 3745 v2i64, v2i32, IntOp, Commutable>; 3746} 3747 3748multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3749 InstrItinClass itin, string OpcodeStr, string Dt, 3750 SDPatternOperator IntOp> { 3751 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3752 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3753 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3754 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3755} 3756 3757// ....then also with element size of 8 bits: 3758multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3759 InstrItinClass itin16, InstrItinClass itin32, 3760 string OpcodeStr, string Dt, 3761 SDPatternOperator IntOp, bit Commutable = 0> 3762 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3763 IntOp, Commutable> { 3764 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3765 OpcodeStr, !strconcat(Dt, "8"), 3766 v8i16, v8i8, IntOp, Commutable>; 3767} 3768 3769// ....with explicit extend (VABDL). 3770multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3771 InstrItinClass itin, string OpcodeStr, string Dt, 3772 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3773 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3774 OpcodeStr, !strconcat(Dt, "8"), 3775 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3776 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3777 OpcodeStr, !strconcat(Dt, "16"), 3778 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3779 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3780 OpcodeStr, !strconcat(Dt, "32"), 3781 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3782} 3783 3784 3785// Neon Wide 3-register vector intrinsics, 3786// source operand element sizes of 8, 16 and 32 bits: 3787multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3788 string OpcodeStr, string Dt, 3789 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3790 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3791 OpcodeStr, !strconcat(Dt, "8"), 3792 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3793 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3794 OpcodeStr, !strconcat(Dt, "16"), 3795 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3796 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3797 OpcodeStr, !strconcat(Dt, "32"), 3798 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3799} 3800 3801 3802// Neon Multiply-Op vector operations, 3803// element sizes of 8, 16 and 32 bits: 3804multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3805 InstrItinClass itinD16, InstrItinClass itinD32, 3806 InstrItinClass itinQ16, InstrItinClass itinQ32, 3807 string OpcodeStr, string Dt, SDNode OpNode> { 3808 // 64-bit vector types. 3809 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3810 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3811 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3812 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3813 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3814 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3815 3816 // 128-bit vector types. 3817 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3818 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3819 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3820 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3821 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3822 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3823} 3824 3825multiclass N3VMulOpSL_HS<bits<4> op11_8, 3826 InstrItinClass itinD16, InstrItinClass itinD32, 3827 InstrItinClass itinQ16, InstrItinClass itinQ32, 3828 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3829 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3830 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3831 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3832 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3833 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3834 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3835 mul, ShOp>; 3836 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3837 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3838 mul, ShOp>; 3839} 3840 3841// Neon Intrinsic-Op vector operations, 3842// element sizes of 8, 16 and 32 bits: 3843multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3844 InstrItinClass itinD, InstrItinClass itinQ, 3845 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3846 SDNode OpNode> { 3847 // 64-bit vector types. 3848 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3849 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3850 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3851 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3852 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3853 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3854 3855 // 128-bit vector types. 3856 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3857 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3858 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3859 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3860 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3861 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3862} 3863 3864// Neon 3-argument intrinsics, 3865// element sizes of 16 and 32 bits: 3866multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3867 InstrItinClass itinD16, InstrItinClass itinD32, 3868 InstrItinClass itinQ16, InstrItinClass itinQ32, 3869 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3870 // 64-bit vector types. 3871 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3872 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3873 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3874 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3875 3876 // 128-bit vector types. 3877 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3878 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3879 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3880 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3881} 3882 3883// element sizes of 8, 16 and 32 bits: 3884multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3885 InstrItinClass itinD16, InstrItinClass itinD32, 3886 InstrItinClass itinQ16, InstrItinClass itinQ32, 3887 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3888 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3889 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3890 // 64-bit vector types. 3891 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3892 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3893 // 128-bit vector types. 3894 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3895 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3896} 3897 3898// Neon Long Multiply-Op vector operations, 3899// element sizes of 8, 16 and 32 bits: 3900multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3901 InstrItinClass itin16, InstrItinClass itin32, 3902 string OpcodeStr, string Dt, SDNode MulOp, 3903 SDNode OpNode> { 3904 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3905 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3906 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3907 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3908 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3909 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3910} 3911 3912multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3913 string Dt, SDNode MulOp, SDNode OpNode> { 3914 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3915 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3916 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3917 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3918} 3919 3920 3921// Neon Long 3-argument intrinsics. 3922 3923// First with only element sizes of 16 and 32 bits: 3924multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3925 InstrItinClass itin16, InstrItinClass itin32, 3926 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3927 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3928 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3929 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3930 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3931} 3932 3933multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3934 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3935 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3936 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3937 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3938 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3939} 3940 3941// ....then also with element size of 8 bits: 3942multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3943 InstrItinClass itin16, InstrItinClass itin32, 3944 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3945 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3946 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3947 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3948} 3949 3950// ....with explicit extend (VABAL). 3951multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3952 InstrItinClass itin, string OpcodeStr, string Dt, 3953 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3954 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3955 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3956 IntOp, ExtOp, OpNode>; 3957 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3958 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3959 IntOp, ExtOp, OpNode>; 3960 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3961 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3962 IntOp, ExtOp, OpNode>; 3963} 3964 3965 3966// Neon Pairwise long 2-register intrinsics, 3967// element sizes of 8, 16 and 32 bits: 3968multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3969 bits<5> op11_7, bit op4, 3970 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3971 // 64-bit vector types. 3972 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3973 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3974 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3975 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3976 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3977 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3978 3979 // 128-bit vector types. 3980 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3981 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3982 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3983 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3984 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3985 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3986} 3987 3988 3989// Neon Pairwise long 2-register accumulate intrinsics, 3990// element sizes of 8, 16 and 32 bits: 3991multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3992 bits<5> op11_7, bit op4, 3993 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3994 // 64-bit vector types. 3995 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3996 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3997 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3998 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3999 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 4000 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 4001 4002 // 128-bit vector types. 4003 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 4004 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 4005 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 4006 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 4007 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 4008 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 4009} 4010 4011 4012// Neon 2-register vector shift by immediate, 4013// with f of either N2RegVShLFrm or N2RegVShRFrm 4014// element sizes of 8, 16, 32 and 64 bits: 4015multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4016 InstrItinClass itin, string OpcodeStr, string Dt, 4017 SDNode OpNode> { 4018 // 64-bit vector types. 4019 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4020 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4021 let Inst{21-19} = 0b001; // imm6 = 001xxx 4022 } 4023 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4024 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4025 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4026 } 4027 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4028 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4029 let Inst{21} = 0b1; // imm6 = 1xxxxx 4030 } 4031 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 4032 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4033 // imm6 = xxxxxx 4034 4035 // 128-bit vector types. 4036 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4037 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4038 let Inst{21-19} = 0b001; // imm6 = 001xxx 4039 } 4040 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4041 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4042 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4043 } 4044 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4045 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4046 let Inst{21} = 0b1; // imm6 = 1xxxxx 4047 } 4048 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 4049 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4050 // imm6 = xxxxxx 4051} 4052multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4053 InstrItinClass itin, string OpcodeStr, string Dt, 4054 string baseOpc, SDNode OpNode> { 4055 // 64-bit vector types. 4056 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4057 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4058 let Inst{21-19} = 0b001; // imm6 = 001xxx 4059 } 4060 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4061 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4062 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4063 } 4064 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4065 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4066 let Inst{21} = 0b1; // imm6 = 1xxxxx 4067 } 4068 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4069 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4070 // imm6 = xxxxxx 4071 4072 // 128-bit vector types. 4073 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4074 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4075 let Inst{21-19} = 0b001; // imm6 = 001xxx 4076 } 4077 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4078 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4079 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4080 } 4081 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4082 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4083 let Inst{21} = 0b1; // imm6 = 1xxxxx 4084 } 4085 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4086 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4087 // imm6 = xxxxxx 4088} 4089 4090// Neon Shift-Accumulate vector operations, 4091// element sizes of 8, 16, 32 and 64 bits: 4092multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4093 string OpcodeStr, string Dt, SDNode ShOp> { 4094 // 64-bit vector types. 4095 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4096 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 4097 let Inst{21-19} = 0b001; // imm6 = 001xxx 4098 } 4099 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4100 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 4101 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4102 } 4103 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4104 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 4105 let Inst{21} = 0b1; // imm6 = 1xxxxx 4106 } 4107 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4108 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 4109 // imm6 = xxxxxx 4110 4111 // 128-bit vector types. 4112 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4113 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 4114 let Inst{21-19} = 0b001; // imm6 = 001xxx 4115 } 4116 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4117 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 4118 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4119 } 4120 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4121 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 4122 let Inst{21} = 0b1; // imm6 = 1xxxxx 4123 } 4124 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4125 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 4126 // imm6 = xxxxxx 4127} 4128 4129// Neon Shift-Insert vector operations, 4130// with f of either N2RegVShLFrm or N2RegVShRFrm 4131// element sizes of 8, 16, 32 and 64 bits: 4132multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4133 string OpcodeStr> { 4134 // 64-bit vector types. 4135 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4136 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> { 4137 let Inst{21-19} = 0b001; // imm6 = 001xxx 4138 } 4139 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4140 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> { 4141 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4142 } 4143 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4144 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> { 4145 let Inst{21} = 0b1; // imm6 = 1xxxxx 4146 } 4147 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 4148 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>; 4149 // imm6 = xxxxxx 4150 4151 // 128-bit vector types. 4152 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4153 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> { 4154 let Inst{21-19} = 0b001; // imm6 = 001xxx 4155 } 4156 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4157 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> { 4158 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4159 } 4160 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4161 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> { 4162 let Inst{21} = 0b1; // imm6 = 1xxxxx 4163 } 4164 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4165 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>; 4166 // imm6 = xxxxxx 4167} 4168multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4169 string OpcodeStr> { 4170 // 64-bit vector types. 4171 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4172 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> { 4173 let Inst{21-19} = 0b001; // imm6 = 001xxx 4174 } 4175 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4176 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> { 4177 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4178 } 4179 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4180 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> { 4181 let Inst{21} = 0b1; // imm6 = 1xxxxx 4182 } 4183 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4184 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>; 4185 // imm6 = xxxxxx 4186 4187 // 128-bit vector types. 4188 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4189 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> { 4190 let Inst{21-19} = 0b001; // imm6 = 001xxx 4191 } 4192 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4193 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> { 4194 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4195 } 4196 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4197 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> { 4198 let Inst{21} = 0b1; // imm6 = 1xxxxx 4199 } 4200 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4201 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>; 4202 // imm6 = xxxxxx 4203} 4204 4205// Neon Shift Long operations, 4206// element sizes of 8, 16, 32 bits: 4207multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4208 bit op4, string OpcodeStr, string Dt, 4209 SDPatternOperator OpNode> { 4210 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4211 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4212 let Inst{21-19} = 0b001; // imm6 = 001xxx 4213 } 4214 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4215 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4216 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4217 } 4218 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4219 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4220 let Inst{21} = 0b1; // imm6 = 1xxxxx 4221 } 4222} 4223 4224// Neon Shift Narrow operations, 4225// element sizes of 16, 32, 64 bits: 4226multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4227 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4228 SDPatternOperator OpNode> { 4229 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4230 OpcodeStr, !strconcat(Dt, "16"), 4231 v8i8, v8i16, shr_imm8, OpNode> { 4232 let Inst{21-19} = 0b001; // imm6 = 001xxx 4233 } 4234 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4235 OpcodeStr, !strconcat(Dt, "32"), 4236 v4i16, v4i32, shr_imm16, OpNode> { 4237 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4238 } 4239 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4240 OpcodeStr, !strconcat(Dt, "64"), 4241 v2i32, v2i64, shr_imm32, OpNode> { 4242 let Inst{21} = 0b1; // imm6 = 1xxxxx 4243 } 4244} 4245 4246//===----------------------------------------------------------------------===// 4247// Instruction Definitions. 4248//===----------------------------------------------------------------------===// 4249 4250// Vector Add Operations. 4251 4252// VADD : Vector Add (integer and floating-point) 4253defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4254 add, 1>; 4255def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4256 v2f32, v2f32, fadd, 1>; 4257def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4258 v4f32, v4f32, fadd, 1>; 4259def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", 4260 v4f16, v4f16, fadd, 1>, 4261 Requires<[HasNEON,HasFullFP16]>; 4262def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", 4263 v8f16, v8f16, fadd, 1>, 4264 Requires<[HasNEON,HasFullFP16]>; 4265// VADDL : Vector Add Long (Q = D + D) 4266defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4267 "vaddl", "s", add, sext, 1>; 4268defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4269 "vaddl", "u", add, zext, 1>; 4270// VADDW : Vector Add Wide (Q = Q + D) 4271defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4272defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 4273// VHADD : Vector Halving Add 4274defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4275 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4276 "vhadd", "s", int_arm_neon_vhadds, 1>; 4277defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4278 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4279 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4280// VRHADD : Vector Rounding Halving Add 4281defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4282 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4283 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4284defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4285 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4286 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4287// VQADD : Vector Saturating Add 4288defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4289 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4290 "vqadd", "s", saddsat, 1>; 4291defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4292 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4293 "vqadd", "u", uaddsat, 1>; 4294// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4295defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4296// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4297defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4298 int_arm_neon_vraddhn, 1>; 4299 4300let Predicates = [HasNEON] in { 4301def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4302 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4303def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4304 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4305def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4306 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4307} 4308 4309// Vector Multiply Operations. 4310 4311// VMUL : Vector Multiply (integer, polynomial and floating-point) 4312defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4313 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4314def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4315 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4316def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4317 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4318def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4319 v2f32, v2f32, fmul, 1>; 4320def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4321 v4f32, v4f32, fmul, 1>; 4322def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", 4323 v4f16, v4f16, fmul, 1>, 4324 Requires<[HasNEON,HasFullFP16]>; 4325def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", 4326 v8f16, v8f16, fmul, 1>, 4327 Requires<[HasNEON,HasFullFP16]>; 4328defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4329def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4330def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4331 v2f32, fmul>; 4332def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, 4333 Requires<[HasNEON,HasFullFP16]>; 4334def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, 4335 v4f16, fmul>, 4336 Requires<[HasNEON,HasFullFP16]>; 4337 4338let Predicates = [HasNEON] in { 4339def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4340 (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))), 4341 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4342 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4343 (DSubReg_i16_reg imm:$lane))), 4344 (SubReg_i16_lane imm:$lane)))>; 4345def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4346 (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))), 4347 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4348 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4349 (DSubReg_i32_reg imm:$lane))), 4350 (SubReg_i32_lane imm:$lane)))>; 4351def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4352 (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))), 4353 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4354 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4355 (DSubReg_i32_reg imm:$lane))), 4356 (SubReg_i32_lane imm:$lane)))>; 4357def : Pat<(v8f16 (fmul (v8f16 QPR:$src1), 4358 (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))), 4359 (v8f16 (VMULslhq(v8f16 QPR:$src1), 4360 (v4f16 (EXTRACT_SUBREG QPR:$src2, 4361 (DSubReg_i16_reg imm:$lane))), 4362 (SubReg_i16_lane imm:$lane)))>; 4363 4364def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4365 (VMULslfd DPR:$Rn, 4366 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4367 (i32 0))>; 4368def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4369 (VMULslhd DPR:$Rn, 4370 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0), 4371 (i32 0))>; 4372def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4373 (VMULslfq QPR:$Rn, 4374 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4375 (i32 0))>; 4376def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4377 (VMULslhq QPR:$Rn, 4378 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0), 4379 (i32 0))>; 4380} 4381 4382// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4383defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4384 IIC_VMULi16Q, IIC_VMULi32Q, 4385 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4386defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4387 IIC_VMULi16Q, IIC_VMULi32Q, 4388 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4389 4390let Predicates = [HasNEON] in { 4391def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4392 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4393 imm:$lane)))), 4394 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4395 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4396 (DSubReg_i16_reg imm:$lane))), 4397 (SubReg_i16_lane imm:$lane)))>; 4398def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4399 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4400 imm:$lane)))), 4401 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4402 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4403 (DSubReg_i32_reg imm:$lane))), 4404 (SubReg_i32_lane imm:$lane)))>; 4405} 4406 4407// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4408defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4409 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4410 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4411defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4412 IIC_VMULi16Q, IIC_VMULi32Q, 4413 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4414 4415let Predicates = [HasNEON] in { 4416def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4417 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4418 imm:$lane)))), 4419 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4420 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4421 (DSubReg_i16_reg imm:$lane))), 4422 (SubReg_i16_lane imm:$lane)))>; 4423def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4424 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4425 imm:$lane)))), 4426 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4427 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4428 (DSubReg_i32_reg imm:$lane))), 4429 (SubReg_i32_lane imm:$lane)))>; 4430} 4431 4432// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4433let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4434 DecoderNamespace = "NEONData" in { 4435 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4436 "vmull", "s", NEONvmulls, 1>; 4437 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4438 "vmull", "u", NEONvmullu, 1>; 4439 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4440 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4441 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4442 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4443 Requires<[HasV8, HasCrypto]>; 4444} 4445defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4446defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4447 4448// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4449defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4450 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4451defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4452 "vqdmull", "s", int_arm_neon_vqdmull>; 4453 4454// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4455 4456// VMLA : Vector Multiply Accumulate (integer and floating-point) 4457defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4458 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4459def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4460 v2f32, fmul_su, fadd_mlx>, 4461 Requires<[HasNEON, UseFPVMLx]>; 4462def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4463 v4f32, fmul_su, fadd_mlx>, 4464 Requires<[HasNEON, UseFPVMLx]>; 4465def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", 4466 v4f16, fmul_su, fadd_mlx>, 4467 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4468def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", 4469 v8f16, fmul_su, fadd_mlx>, 4470 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4471defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4472 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4473def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4474 v2f32, fmul_su, fadd_mlx>, 4475 Requires<[HasNEON, UseFPVMLx]>; 4476def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4477 v4f32, v2f32, fmul_su, fadd_mlx>, 4478 Requires<[HasNEON, UseFPVMLx]>; 4479def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", 4480 v4f16, fmul, fadd>, 4481 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4482def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", 4483 v8f16, v4f16, fmul, fadd>, 4484 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4485 4486let Predicates = [HasNEON] in { 4487def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4488 (mul (v8i16 QPR:$src2), 4489 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4490 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4491 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4492 (DSubReg_i16_reg imm:$lane))), 4493 (SubReg_i16_lane imm:$lane)))>; 4494 4495def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4496 (mul (v4i32 QPR:$src2), 4497 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4498 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4499 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4500 (DSubReg_i32_reg imm:$lane))), 4501 (SubReg_i32_lane imm:$lane)))>; 4502} 4503 4504def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4505 (fmul_su (v4f32 QPR:$src2), 4506 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4507 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4508 (v4f32 QPR:$src2), 4509 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4510 (DSubReg_i32_reg imm:$lane))), 4511 (SubReg_i32_lane imm:$lane)))>, 4512 Requires<[HasNEON, UseFPVMLx]>; 4513 4514// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4515defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4516 "vmlal", "s", NEONvmulls, add>; 4517defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4518 "vmlal", "u", NEONvmullu, add>; 4519 4520defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4521defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4522 4523let Predicates = [HasNEON, HasV8_1a] in { 4524 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4525 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4526 // (Q += D * D) 4527 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4528 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4529 null_frag>; 4530 def : Pat<(v4i16 (saddsat 4531 (v4i16 DPR:$src1), 4532 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4533 (v4i16 DPR:$Vm))))), 4534 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4535 def : Pat<(v2i32 (saddsat 4536 (v2i32 DPR:$src1), 4537 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4538 (v2i32 DPR:$Vm))))), 4539 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4540 def : Pat<(v8i16 (saddsat 4541 (v8i16 QPR:$src1), 4542 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4543 (v8i16 QPR:$Vm))))), 4544 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4545 def : Pat<(v4i32 (saddsat 4546 (v4i32 QPR:$src1), 4547 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4548 (v4i32 QPR:$Vm))))), 4549 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4550 4551 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4552 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4553 null_frag>; 4554 def : Pat<(v4i16 (saddsat 4555 (v4i16 DPR:$src1), 4556 (v4i16 (int_arm_neon_vqrdmulh 4557 (v4i16 DPR:$Vn), 4558 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4559 imm:$lane)))))), 4560 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4561 imm:$lane))>; 4562 def : Pat<(v2i32 (saddsat 4563 (v2i32 DPR:$src1), 4564 (v2i32 (int_arm_neon_vqrdmulh 4565 (v2i32 DPR:$Vn), 4566 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4567 imm:$lane)))))), 4568 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4569 imm:$lane))>; 4570 def : Pat<(v8i16 (saddsat 4571 (v8i16 QPR:$src1), 4572 (v8i16 (int_arm_neon_vqrdmulh 4573 (v8i16 QPR:$src2), 4574 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4575 imm:$lane)))))), 4576 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4577 (v8i16 QPR:$src2), 4578 (v4i16 (EXTRACT_SUBREG 4579 QPR:$src3, 4580 (DSubReg_i16_reg imm:$lane))), 4581 (SubReg_i16_lane imm:$lane)))>; 4582 def : Pat<(v4i32 (saddsat 4583 (v4i32 QPR:$src1), 4584 (v4i32 (int_arm_neon_vqrdmulh 4585 (v4i32 QPR:$src2), 4586 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4587 imm:$lane)))))), 4588 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4589 (v4i32 QPR:$src2), 4590 (v2i32 (EXTRACT_SUBREG 4591 QPR:$src3, 4592 (DSubReg_i32_reg imm:$lane))), 4593 (SubReg_i32_lane imm:$lane)))>; 4594 4595 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4596 // (Q -= D * D) 4597 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4598 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4599 null_frag>; 4600 def : Pat<(v4i16 (ssubsat 4601 (v4i16 DPR:$src1), 4602 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4603 (v4i16 DPR:$Vm))))), 4604 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4605 def : Pat<(v2i32 (ssubsat 4606 (v2i32 DPR:$src1), 4607 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4608 (v2i32 DPR:$Vm))))), 4609 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4610 def : Pat<(v8i16 (ssubsat 4611 (v8i16 QPR:$src1), 4612 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4613 (v8i16 QPR:$Vm))))), 4614 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4615 def : Pat<(v4i32 (ssubsat 4616 (v4i32 QPR:$src1), 4617 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4618 (v4i32 QPR:$Vm))))), 4619 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4620 4621 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4622 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4623 null_frag>; 4624 def : Pat<(v4i16 (ssubsat 4625 (v4i16 DPR:$src1), 4626 (v4i16 (int_arm_neon_vqrdmulh 4627 (v4i16 DPR:$Vn), 4628 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4629 imm:$lane)))))), 4630 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4631 def : Pat<(v2i32 (ssubsat 4632 (v2i32 DPR:$src1), 4633 (v2i32 (int_arm_neon_vqrdmulh 4634 (v2i32 DPR:$Vn), 4635 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4636 imm:$lane)))))), 4637 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4638 imm:$lane))>; 4639 def : Pat<(v8i16 (ssubsat 4640 (v8i16 QPR:$src1), 4641 (v8i16 (int_arm_neon_vqrdmulh 4642 (v8i16 QPR:$src2), 4643 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4644 imm:$lane)))))), 4645 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4646 (v8i16 QPR:$src2), 4647 (v4i16 (EXTRACT_SUBREG 4648 QPR:$src3, 4649 (DSubReg_i16_reg imm:$lane))), 4650 (SubReg_i16_lane imm:$lane)))>; 4651 def : Pat<(v4i32 (ssubsat 4652 (v4i32 QPR:$src1), 4653 (v4i32 (int_arm_neon_vqrdmulh 4654 (v4i32 QPR:$src2), 4655 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4656 imm:$lane)))))), 4657 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4658 (v4i32 QPR:$src2), 4659 (v2i32 (EXTRACT_SUBREG 4660 QPR:$src3, 4661 (DSubReg_i32_reg imm:$lane))), 4662 (SubReg_i32_lane imm:$lane)))>; 4663} 4664// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4665defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4666 "vqdmlal", "s", null_frag>; 4667defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4668 4669let Predicates = [HasNEON] in { 4670def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4671 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4672 (v4i16 DPR:$Vm))))), 4673 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4674def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4675 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4676 (v2i32 DPR:$Vm))))), 4677 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4678def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4679 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4680 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4681 imm:$lane)))))), 4682 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4683def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4684 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4685 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4686 imm:$lane)))))), 4687 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4688} 4689 4690// VMLS : Vector Multiply Subtract (integer and floating-point) 4691defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4692 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4693def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4694 v2f32, fmul_su, fsub_mlx>, 4695 Requires<[HasNEON, UseFPVMLx]>; 4696def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4697 v4f32, fmul_su, fsub_mlx>, 4698 Requires<[HasNEON, UseFPVMLx]>; 4699def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", 4700 v4f16, fmul, fsub>, 4701 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4702def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", 4703 v8f16, fmul, fsub>, 4704 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4705defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4706 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4707def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4708 v2f32, fmul_su, fsub_mlx>, 4709 Requires<[HasNEON, UseFPVMLx]>; 4710def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4711 v4f32, v2f32, fmul_su, fsub_mlx>, 4712 Requires<[HasNEON, UseFPVMLx]>; 4713def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", 4714 v4f16, fmul, fsub>, 4715 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4716def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", 4717 v8f16, v4f16, fmul, fsub>, 4718 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4719 4720let Predicates = [HasNEON] in { 4721def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4722 (mul (v8i16 QPR:$src2), 4723 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4724 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4725 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4726 (DSubReg_i16_reg imm:$lane))), 4727 (SubReg_i16_lane imm:$lane)))>; 4728 4729def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4730 (mul (v4i32 QPR:$src2), 4731 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4732 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4733 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4734 (DSubReg_i32_reg imm:$lane))), 4735 (SubReg_i32_lane imm:$lane)))>; 4736} 4737 4738def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4739 (fmul_su (v4f32 QPR:$src2), 4740 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4741 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4742 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4743 (DSubReg_i32_reg imm:$lane))), 4744 (SubReg_i32_lane imm:$lane)))>, 4745 Requires<[HasNEON, UseFPVMLx]>; 4746 4747// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4748defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4749 "vmlsl", "s", NEONvmulls, sub>; 4750defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4751 "vmlsl", "u", NEONvmullu, sub>; 4752 4753defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4754defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4755 4756// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4757defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4758 "vqdmlsl", "s", null_frag>; 4759defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4760 4761let Predicates = [HasNEON] in { 4762def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4763 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4764 (v4i16 DPR:$Vm))))), 4765 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4766def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4767 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4768 (v2i32 DPR:$Vm))))), 4769 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4770def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4771 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4772 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4773 imm:$lane)))))), 4774 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4775def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4776 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4777 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4778 imm:$lane)))))), 4779 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4780} 4781 4782// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4783def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4784 v2f32, fmul_su, fadd_mlx>, 4785 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4786 4787def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4788 v4f32, fmul_su, fadd_mlx>, 4789 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4790def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", 4791 v4f16, fmul, fadd>, 4792 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4793 4794def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", 4795 v8f16, fmul, fadd>, 4796 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4797 4798// Fused Vector Multiply Subtract (floating-point) 4799def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4800 v2f32, fmul_su, fsub_mlx>, 4801 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4802def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4803 v4f32, fmul_su, fsub_mlx>, 4804 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4805def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", 4806 v4f16, fmul, fsub>, 4807 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4808def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", 4809 v8f16, fmul, fsub>, 4810 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4811 4812// Match @llvm.fma.* intrinsics 4813def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4814 (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4815 Requires<[HasNEON,HasFullFP16]>; 4816def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4817 (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4818 Requires<[HasNEON,HasFullFP16]>; 4819def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4820 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4821 Requires<[HasNEON,HasVFP4]>; 4822def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4823 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4824 Requires<[HasNEON,HasVFP4]>; 4825def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4826 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4827 Requires<[HasNEON,HasVFP4]>; 4828def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4829 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4830 Requires<[HasNEON,HasVFP4]>; 4831 4832// ARMv8.2a dot product instructions. 4833// We put them in the VFPV8 decoder namespace because the ARM and Thumb 4834// encodings are the same and thus no further bit twiddling is necessary 4835// in the disassembler. 4836class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy, 4837 ValueType AccumTy, ValueType InputTy, 4838 SDPatternOperator OpNode> : 4839 N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst), 4840 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, 4841 Asm, AsmTy, 4842 [(set (AccumTy RegTy:$dst), 4843 (OpNode (AccumTy RegTy:$Vd), 4844 (InputTy RegTy:$Vn), 4845 (InputTy RegTy:$Vm)))]> { 4846 let Predicates = [HasDotProd]; 4847 let DecoderNamespace = "VFPV8"; 4848 let Constraints = "$dst = $Vd"; 4849} 4850 4851def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>; 4852def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>; 4853def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>; 4854def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>; 4855 4856// Indexed dot product instructions: 4857multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty, 4858 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode, 4859 dag RHS> { 4860 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst), 4861 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 4862 N3RegFrm, IIC_VDOTPROD, opc, dt, []> { 4863 bit lane; 4864 let Inst{5} = lane; 4865 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); 4866 let Constraints = "$dst = $Vd"; 4867 let Predicates = [HasDotProd]; 4868 let DecoderNamespace = "VFPV8"; 4869 } 4870 4871 def : Pat< 4872 (AccumType (OpNode (AccumType Ty:$Vd), 4873 (InputType Ty:$Vn), 4874 (InputType (bitconvert (AccumType 4875 (ARMvduplane (AccumType Ty:$Vm), 4876 VectorIndex32:$lane)))))), 4877 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>; 4878} 4879 4880defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8, 4881 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>; 4882defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8, 4883 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>; 4884defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8, 4885 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4886defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8, 4887 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4888 4889 4890// ARMv8.3 complex operations 4891class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q, 4892 InstrItinClass itin, dag oops, dag iops, 4893 string opc, string dt, list<dag> pattern> 4894 : N3VCP8<{?,?}, {op21,s}, q, op4, oops, 4895 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ 4896 bits<2> rot; 4897 let Inst{24-23} = rot; 4898} 4899 4900class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q, 4901 InstrItinClass itin, dag oops, dag iops, string opc, 4902 string dt, list<dag> pattern> 4903 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, 4904 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { 4905 bits<1> rot; 4906 let Inst{24} = rot; 4907} 4908 4909class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin, 4910 dag oops, dag iops, string opc, string dt, 4911 list<dag> pattern> 4912 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4913 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4914 bits<2> rot; 4915 bit lane; 4916 4917 let Inst{21-20} = rot; 4918 let Inst{5} = lane; 4919} 4920 4921class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin, 4922 dag oops, dag iops, string opc, string dt, 4923 list<dag> pattern> 4924 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4925 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4926 bits<2> rot; 4927 bit lane; 4928 4929 let Inst{21-20} = rot; 4930 let Inst{5} = Vm{4}; 4931 // This is needed because the lane operand does not have any bits in the 4932 // encoding (it only has one possible value), so we need to manually set it 4933 // to it's default value. 4934 let DecoderMethod = "DecodeNEONComplexLane64Instruction"; 4935} 4936 4937multiclass N3VCP8ComplexTied<bit op21, bit op4, 4938 string OpcodeStr, SDPatternOperator Op> { 4939 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4940 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd), 4941 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4942 OpcodeStr, "f16", []>; 4943 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd), 4944 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4945 OpcodeStr, "f16", []>; 4946 } 4947 let Predicates = [HasNEON,HasV8_3a] in { 4948 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd), 4949 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4950 OpcodeStr, "f32", []>; 4951 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd), 4952 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4953 OpcodeStr, "f32", []>; 4954 } 4955} 4956 4957multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4, 4958 string OpcodeStr, SDPatternOperator Op> { 4959 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4960 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD, 4961 (outs DPR:$Vd), 4962 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4963 OpcodeStr, "f16", []>; 4964 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ, 4965 (outs QPR:$Vd), 4966 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4967 OpcodeStr, "f16", []>; 4968 } 4969 let Predicates = [HasNEON,HasV8_3a] in { 4970 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD, 4971 (outs DPR:$Vd), 4972 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4973 OpcodeStr, "f32", []>; 4974 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ, 4975 (outs QPR:$Vd), 4976 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4977 OpcodeStr, "f32", []>; 4978 } 4979} 4980 4981// These instructions index by pairs of lanes, so the VectorIndexes are twice 4982// as wide as the data types. 4983multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr, 4984 SDPatternOperator Op> { 4985 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4986 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD, 4987 (outs DPR:$Vd), 4988 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4989 VectorIndex32:$lane, complexrotateop:$rot), 4990 OpcodeStr, "f16", []>; 4991 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ, 4992 (outs QPR:$Vd), 4993 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, 4994 VectorIndex32:$lane, complexrotateop:$rot), 4995 OpcodeStr, "f16", []>; 4996 } 4997 let Predicates = [HasNEON,HasV8_3a] in { 4998 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD, 4999 (outs DPR:$Vd), 5000 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5001 complexrotateop:$rot), 5002 OpcodeStr, "f32", []>; 5003 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ, 5004 (outs QPR:$Vd), 5005 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5006 complexrotateop:$rot), 5007 OpcodeStr, "f32", []>; 5008 } 5009} 5010 5011defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; 5012defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; 5013defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; 5014 5015let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5016 def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5017 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>; 5018 def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5019 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>; 5020 def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5021 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>; 5022 def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5023 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>; 5024} 5025let Predicates = [HasNEON,HasV8_3a] in { 5026 def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5027 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>; 5028 def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5029 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>; 5030 def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5031 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>; 5032 def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5033 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>; 5034} 5035 5036// Vector Subtract Operations. 5037 5038// VSUB : Vector Subtract (integer and floating-point) 5039defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 5040 "vsub", "i", sub, 0>; 5041def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 5042 v2f32, v2f32, fsub, 0>; 5043def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 5044 v4f32, v4f32, fsub, 0>; 5045def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", 5046 v4f16, v4f16, fsub, 0>, 5047 Requires<[HasNEON,HasFullFP16]>; 5048def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", 5049 v8f16, v8f16, fsub, 0>, 5050 Requires<[HasNEON,HasFullFP16]>; 5051// VSUBL : Vector Subtract Long (Q = D - D) 5052defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5053 "vsubl", "s", sub, sext, 0>; 5054defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5055 "vsubl", "u", sub, zext, 0>; 5056// VSUBW : Vector Subtract Wide (Q = Q - D) 5057defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 5058defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 5059// VHSUB : Vector Halving Subtract 5060defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 5061 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5062 "vhsub", "s", int_arm_neon_vhsubs, 0>; 5063defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 5064 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5065 "vhsub", "u", int_arm_neon_vhsubu, 0>; 5066// VQSUB : Vector Saturing Subtract 5067defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 5068 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5069 "vqsub", "s", ssubsat, 0>; 5070defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 5071 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5072 "vqsub", "u", usubsat, 0>; 5073// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 5074defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 5075// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 5076defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 5077 int_arm_neon_vrsubhn, 0>; 5078 5079let Predicates = [HasNEON] in { 5080def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 5081 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 5082def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 5083 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 5084def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 5085 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 5086} 5087 5088// Vector Comparisons. 5089 5090// VCEQ : Vector Compare Equal 5091defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5092 IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>; 5093def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 5094 ARMCCeq, 1>; 5095def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 5096 ARMCCeq, 1>; 5097def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, 5098 ARMCCeq, 1>, 5099 Requires<[HasNEON, HasFullFP16]>; 5100def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, 5101 ARMCCeq, 1>, 5102 Requires<[HasNEON, HasFullFP16]>; 5103 5104let TwoOperandAliasConstraint = "$Vm = $Vd" in 5105defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 5106 "$Vd, $Vm, #0", ARMCCeq>; 5107 5108// VCGE : Vector Compare Greater Than or Equal 5109defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5110 IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>; 5111defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5112 IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>; 5113def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 5114 ARMCCge, 0>; 5115def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 5116 ARMCCge, 0>; 5117def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, 5118 ARMCCge, 0>, 5119 Requires<[HasNEON, HasFullFP16]>; 5120def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, 5121 ARMCCge, 0>, 5122 Requires<[HasNEON, HasFullFP16]>; 5123 5124let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5125defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 5126 "$Vd, $Vm, #0", ARMCCge>; 5127defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 5128 "$Vd, $Vm, #0", ARMCCle>; 5129} 5130 5131// VCGT : Vector Compare Greater Than 5132defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5133 IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>; 5134defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5135 IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>; 5136def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 5137 ARMCCgt, 0>; 5138def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 5139 ARMCCgt, 0>; 5140def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, 5141 ARMCCgt, 0>, 5142 Requires<[HasNEON, HasFullFP16]>; 5143def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, 5144 ARMCCgt, 0>, 5145 Requires<[HasNEON, HasFullFP16]>; 5146 5147let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5148defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 5149 "$Vd, $Vm, #0", ARMCCgt>; 5150defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 5151 "$Vd, $Vm, #0", ARMCClt>; 5152} 5153 5154// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 5155def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5156 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 5157def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5158 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 5159def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5160 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, 5161 Requires<[HasNEON, HasFullFP16]>; 5162def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5163 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, 5164 Requires<[HasNEON, HasFullFP16]>; 5165// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 5166def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5167 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 5168def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5169 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 5170def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5171 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, 5172 Requires<[HasNEON, HasFullFP16]>; 5173def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5174 "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>, 5175 Requires<[HasNEON, HasFullFP16]>; 5176// VTST : Vector Test Bits 5177defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 5178 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 5179 5180def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5181 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5182def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5183 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5184def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5185 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5186def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5187 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5188let Predicates = [HasNEON, HasFullFP16] in { 5189def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5190 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5191def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5192 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5193def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5194 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5195def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5196 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5197} 5198 5199// +fp16fml Floating Point Multiplication Variants 5200let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in { 5201 5202class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn, 5203 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5204 : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5205 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5206 5207class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn, 5208 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5209 : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5210 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5211 5212// Vd, Vs, Vs[0-15], Idx[0-1] 5213class VFMD<string opc, string type, bits<2> S> 5214 : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd), 5215 (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx), 5216 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5217 bit idx; 5218 let Inst{3} = idx; 5219 let Inst{19-16} = Vn{4-1}; 5220 let Inst{7} = Vn{0}; 5221 let Inst{5} = Vm{0}; 5222 let Inst{2-0} = Vm{3-1}; 5223} 5224 5225// Vq, Vd, Vd[0-7], Idx[0-3] 5226class VFMQ<string opc, string type, bits<2> S> 5227 : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd), 5228 (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 5229 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5230 bits<2> idx; 5231 let Inst{5} = idx{1}; 5232 let Inst{3} = idx{0}; 5233} 5234 5235let hasNoSchedulingInfo = 1 in { 5236// op1 op2 op3 5237def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>; 5238def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>; 5239def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>; 5240def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>; 5241def VFMALDI : VFMD<"vfmal", "f16", 0b00>; 5242def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>; 5243def VFMALQI : VFMQ<"vfmal", "f16", 0b00>; 5244def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>; 5245} 5246} // HasNEON, HasFP16FML 5247 5248 5249def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5250 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5251def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5252 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5253def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5254 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5255def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5256 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5257let Predicates = [HasNEON, HasFullFP16] in { 5258def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5259 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5260def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5261 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5262def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5263 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5264def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5265 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5266} 5267 5268// Vector Bitwise Operations. 5269 5270def vnotd : PatFrag<(ops node:$in), 5271 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 5272def vnotq : PatFrag<(ops node:$in), 5273 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 5274 5275 5276// VAND : Vector Bitwise AND 5277def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 5278 v2i32, v2i32, and, 1>; 5279def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 5280 v4i32, v4i32, and, 1>; 5281 5282// VEOR : Vector Bitwise Exclusive OR 5283def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 5284 v2i32, v2i32, xor, 1>; 5285def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 5286 v4i32, v4i32, xor, 1>; 5287 5288// VORR : Vector Bitwise OR 5289def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 5290 v2i32, v2i32, or, 1>; 5291def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 5292 v4i32, v4i32, or, 1>; 5293 5294def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 5295 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5296 IIC_VMOVImm, 5297 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5298 [(set DPR:$Vd, 5299 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 5300 let Inst{9} = SIMM{9}; 5301} 5302 5303def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 5304 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5305 IIC_VMOVImm, 5306 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5307 [(set DPR:$Vd, 5308 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 5309 let Inst{10-9} = SIMM{10-9}; 5310} 5311 5312def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 5313 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5314 IIC_VMOVImm, 5315 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5316 [(set QPR:$Vd, 5317 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 5318 let Inst{9} = SIMM{9}; 5319} 5320 5321def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 5322 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5323 IIC_VMOVImm, 5324 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5325 [(set QPR:$Vd, 5326 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 5327 let Inst{10-9} = SIMM{10-9}; 5328} 5329 5330 5331// VBIC : Vector Bitwise Bit Clear (AND NOT) 5332let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5333def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5334 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5335 "vbic", "$Vd, $Vn, $Vm", "", 5336 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 5337 (vnotd DPR:$Vm))))]>; 5338def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5339 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5340 "vbic", "$Vd, $Vn, $Vm", "", 5341 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 5342 (vnotq QPR:$Vm))))]>; 5343} 5344 5345def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 5346 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5347 IIC_VMOVImm, 5348 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5349 [(set DPR:$Vd, 5350 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 5351 let Inst{9} = SIMM{9}; 5352} 5353 5354def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 5355 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5356 IIC_VMOVImm, 5357 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5358 [(set DPR:$Vd, 5359 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 5360 let Inst{10-9} = SIMM{10-9}; 5361} 5362 5363def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 5364 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5365 IIC_VMOVImm, 5366 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5367 [(set QPR:$Vd, 5368 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 5369 let Inst{9} = SIMM{9}; 5370} 5371 5372def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 5373 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5374 IIC_VMOVImm, 5375 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5376 [(set QPR:$Vd, 5377 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 5378 let Inst{10-9} = SIMM{10-9}; 5379} 5380 5381// VORN : Vector Bitwise OR NOT 5382def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 5383 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5384 "vorn", "$Vd, $Vn, $Vm", "", 5385 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 5386 (vnotd DPR:$Vm))))]>; 5387def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 5388 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5389 "vorn", "$Vd, $Vn, $Vm", "", 5390 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 5391 (vnotq QPR:$Vm))))]>; 5392 5393// VMVN : Vector Bitwise NOT (Immediate) 5394 5395let isReMaterializable = 1 in { 5396 5397def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 5398 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5399 "vmvn", "i16", "$Vd, $SIMM", "", 5400 [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> { 5401 let Inst{9} = SIMM{9}; 5402} 5403 5404def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 5405 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5406 "vmvn", "i16", "$Vd, $SIMM", "", 5407 [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> { 5408 let Inst{9} = SIMM{9}; 5409} 5410 5411def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 5412 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5413 "vmvn", "i32", "$Vd, $SIMM", "", 5414 [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> { 5415 let Inst{11-8} = SIMM{11-8}; 5416} 5417 5418def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 5419 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5420 "vmvn", "i32", "$Vd, $SIMM", "", 5421 [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> { 5422 let Inst{11-8} = SIMM{11-8}; 5423} 5424} 5425 5426// VMVN : Vector Bitwise NOT 5427def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 5428 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 5429 "vmvn", "$Vd, $Vm", "", 5430 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 5431def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 5432 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 5433 "vmvn", "$Vd, $Vm", "", 5434 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 5435let Predicates = [HasNEON] in { 5436def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 5437def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 5438} 5439 5440// VBSL : Vector Bitwise Select 5441def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5442 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5443 N3RegFrm, IIC_VCNTiD, 5444 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5445 [(set DPR:$Vd, 5446 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 5447let Predicates = [HasNEON] in { 5448def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 5449 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 5450 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5451def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 5452 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 5453 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5454def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 5455 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 5456 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5457def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 5458 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 5459 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5460def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 5461 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 5462 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5463 5464def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 5465 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5466 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5467 5468def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 5469 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5470 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5471} 5472 5473def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5474 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5475 N3RegFrm, IIC_VCNTiQ, 5476 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5477 [(set QPR:$Vd, 5478 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 5479 5480let Predicates = [HasNEON] in { 5481def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 5482 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 5483 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5484def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 5485 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 5486 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5487def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 5488 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 5489 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5490def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 5491 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 5492 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5493def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 5494 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 5495 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5496 5497def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 5498 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5499 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5500def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 5501 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5502 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5503} 5504 5505// VBIF : Vector Bitwise Insert if False 5506// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 5507// FIXME: This instruction's encoding MAY NOT BE correct. 5508def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 5509 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5510 N3RegFrm, IIC_VBINiD, 5511 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5512 []>; 5513def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 5514 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5515 N3RegFrm, IIC_VBINiQ, 5516 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5517 []>; 5518 5519// VBIT : Vector Bitwise Insert if True 5520// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 5521// FIXME: This instruction's encoding MAY NOT BE correct. 5522def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 5523 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5524 N3RegFrm, IIC_VBINiD, 5525 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5526 []>; 5527def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 5528 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5529 N3RegFrm, IIC_VBINiQ, 5530 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5531 []>; 5532 5533// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 5534// for equivalent operations with different register constraints; it just 5535// inserts copies. 5536 5537// Vector Absolute Differences. 5538 5539// VABD : Vector Absolute Difference 5540defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 5541 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5542 "vabd", "s", int_arm_neon_vabds, 1>; 5543defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 5544 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5545 "vabd", "u", int_arm_neon_vabdu, 1>; 5546def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5547 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5548def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5549 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5550def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, 5551 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, 5552 Requires<[HasNEON, HasFullFP16]>; 5553def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5554 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, 5555 Requires<[HasNEON, HasFullFP16]>; 5556 5557// VABDL : Vector Absolute Difference Long (Q = | D - D |) 5558defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5559 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5560defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5561 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5562 5563let Predicates = [HasNEON] in { 5564def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), 5565 (VABDLuv8i16 DPR:$opA, DPR:$opB)>; 5566def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), 5567 (VABDLuv4i32 DPR:$opA, DPR:$opB)>; 5568} 5569 5570// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the 5571// shift/xor pattern for ABS. 5572 5573def abd_shr : 5574 PatFrag<(ops node:$in1, node:$in2, node:$shift), 5575 (ARMvshrsImm (sub (zext node:$in1), 5576 (zext node:$in2)), (i32 $shift))>; 5577 5578let Predicates = [HasNEON] in { 5579def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), 5580 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), 5581 (zext (v2i32 DPR:$opB))), 5582 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), 5583 (VABDLuv2i64 DPR:$opA, DPR:$opB)>; 5584} 5585 5586// VABA : Vector Absolute Difference and Accumulate 5587defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5588 "vaba", "s", int_arm_neon_vabds, add>; 5589defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5590 "vaba", "u", int_arm_neon_vabdu, add>; 5591 5592// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5593defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5594 "vabal", "s", int_arm_neon_vabds, zext, add>; 5595defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5596 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5597 5598// Vector Maximum and Minimum. 5599 5600// VMAX : Vector Maximum 5601defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5602 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5603 "vmax", "s", smax, 1>; 5604defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5605 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5606 "vmax", "u", umax, 1>; 5607def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5608 "vmax", "f32", 5609 v2f32, v2f32, fmaximum, 1>; 5610def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5611 "vmax", "f32", 5612 v4f32, v4f32, fmaximum, 1>; 5613def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, 5614 "vmax", "f16", 5615 v4f16, v4f16, fmaximum, 1>, 5616 Requires<[HasNEON, HasFullFP16]>; 5617def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5618 "vmax", "f16", 5619 v8f16, v8f16, fmaximum, 1>, 5620 Requires<[HasNEON, HasFullFP16]>; 5621 5622// VMAXNM 5623let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5624 def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5625 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5626 v2f32, v2f32, fmaxnum, 1>, 5627 Requires<[HasV8, HasNEON]>; 5628 def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5629 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5630 v4f32, v4f32, fmaxnum, 1>, 5631 Requires<[HasV8, HasNEON]>; 5632 def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, 5633 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5634 v4f16, v4f16, fmaxnum, 1>, 5635 Requires<[HasV8, HasNEON, HasFullFP16]>; 5636 def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, 5637 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5638 v8f16, v8f16, fmaxnum, 1>, 5639 Requires<[HasV8, HasNEON, HasFullFP16]>; 5640} 5641 5642// VMIN : Vector Minimum 5643defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5644 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5645 "vmin", "s", smin, 1>; 5646defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5647 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5648 "vmin", "u", umin, 1>; 5649def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5650 "vmin", "f32", 5651 v2f32, v2f32, fminimum, 1>; 5652def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5653 "vmin", "f32", 5654 v4f32, v4f32, fminimum, 1>; 5655def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, 5656 "vmin", "f16", 5657 v4f16, v4f16, fminimum, 1>, 5658 Requires<[HasNEON, HasFullFP16]>; 5659def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5660 "vmin", "f16", 5661 v8f16, v8f16, fminimum, 1>, 5662 Requires<[HasNEON, HasFullFP16]>; 5663 5664// VMINNM 5665let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5666 def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5667 N3RegFrm, NoItinerary, "vminnm", "f32", 5668 v2f32, v2f32, fminnum, 1>, 5669 Requires<[HasV8, HasNEON]>; 5670 def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5671 N3RegFrm, NoItinerary, "vminnm", "f32", 5672 v4f32, v4f32, fminnum, 1>, 5673 Requires<[HasV8, HasNEON]>; 5674 def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, 5675 N3RegFrm, NoItinerary, "vminnm", "f16", 5676 v4f16, v4f16, fminnum, 1>, 5677 Requires<[HasV8, HasNEON, HasFullFP16]>; 5678 def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, 5679 N3RegFrm, NoItinerary, "vminnm", "f16", 5680 v8f16, v8f16, fminnum, 1>, 5681 Requires<[HasV8, HasNEON, HasFullFP16]>; 5682} 5683 5684// Vector Pairwise Operations. 5685 5686// VPADD : Vector Pairwise Add 5687def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5688 "vpadd", "i8", 5689 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5690def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5691 "vpadd", "i16", 5692 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5693def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5694 "vpadd", "i32", 5695 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5696def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5697 IIC_VPBIND, "vpadd", "f32", 5698 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5699def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, 5700 IIC_VPBIND, "vpadd", "f16", 5701 v4f16, v4f16, int_arm_neon_vpadd, 0>, 5702 Requires<[HasNEON, HasFullFP16]>; 5703 5704// VPADDL : Vector Pairwise Add Long 5705defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5706 int_arm_neon_vpaddls>; 5707defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5708 int_arm_neon_vpaddlu>; 5709 5710// VPADAL : Vector Pairwise Add and Accumulate Long 5711defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5712 int_arm_neon_vpadals>; 5713defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5714 int_arm_neon_vpadalu>; 5715 5716// VPMAX : Vector Pairwise Maximum 5717def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5718 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5719def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5720 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5721def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5722 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5723def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5724 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5725def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5726 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5727def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5728 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5729def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5730 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5731def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5732 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, 5733 Requires<[HasNEON, HasFullFP16]>; 5734 5735// VPMIN : Vector Pairwise Minimum 5736def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5737 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5738def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5739 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5740def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5741 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5742def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5743 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5744def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5745 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5746def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5747 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5748def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5749 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5750def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5751 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, 5752 Requires<[HasNEON, HasFullFP16]>; 5753 5754// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5755 5756// VRECPE : Vector Reciprocal Estimate 5757def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5758 IIC_VUNAD, "vrecpe", "u32", 5759 v2i32, v2i32, int_arm_neon_vrecpe>; 5760def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5761 IIC_VUNAQ, "vrecpe", "u32", 5762 v4i32, v4i32, int_arm_neon_vrecpe>; 5763def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5764 IIC_VUNAD, "vrecpe", "f32", 5765 v2f32, v2f32, int_arm_neon_vrecpe>; 5766def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5767 IIC_VUNAQ, "vrecpe", "f32", 5768 v4f32, v4f32, int_arm_neon_vrecpe>; 5769def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5770 IIC_VUNAD, "vrecpe", "f16", 5771 v4f16, v4f16, int_arm_neon_vrecpe>, 5772 Requires<[HasNEON, HasFullFP16]>; 5773def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5774 IIC_VUNAQ, "vrecpe", "f16", 5775 v8f16, v8f16, int_arm_neon_vrecpe>, 5776 Requires<[HasNEON, HasFullFP16]>; 5777 5778// VRECPS : Vector Reciprocal Step 5779def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5780 IIC_VRECSD, "vrecps", "f32", 5781 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5782def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5783 IIC_VRECSQ, "vrecps", "f32", 5784 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5785def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5786 IIC_VRECSD, "vrecps", "f16", 5787 v4f16, v4f16, int_arm_neon_vrecps, 1>, 5788 Requires<[HasNEON, HasFullFP16]>; 5789def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5790 IIC_VRECSQ, "vrecps", "f16", 5791 v8f16, v8f16, int_arm_neon_vrecps, 1>, 5792 Requires<[HasNEON, HasFullFP16]>; 5793 5794// VRSQRTE : Vector Reciprocal Square Root Estimate 5795def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5796 IIC_VUNAD, "vrsqrte", "u32", 5797 v2i32, v2i32, int_arm_neon_vrsqrte>; 5798def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5799 IIC_VUNAQ, "vrsqrte", "u32", 5800 v4i32, v4i32, int_arm_neon_vrsqrte>; 5801def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5802 IIC_VUNAD, "vrsqrte", "f32", 5803 v2f32, v2f32, int_arm_neon_vrsqrte>; 5804def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5805 IIC_VUNAQ, "vrsqrte", "f32", 5806 v4f32, v4f32, int_arm_neon_vrsqrte>; 5807def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5808 IIC_VUNAD, "vrsqrte", "f16", 5809 v4f16, v4f16, int_arm_neon_vrsqrte>, 5810 Requires<[HasNEON, HasFullFP16]>; 5811def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5812 IIC_VUNAQ, "vrsqrte", "f16", 5813 v8f16, v8f16, int_arm_neon_vrsqrte>, 5814 Requires<[HasNEON, HasFullFP16]>; 5815 5816// VRSQRTS : Vector Reciprocal Square Root Step 5817def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5818 IIC_VRECSD, "vrsqrts", "f32", 5819 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5820def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5821 IIC_VRECSQ, "vrsqrts", "f32", 5822 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5823def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5824 IIC_VRECSD, "vrsqrts", "f16", 5825 v4f16, v4f16, int_arm_neon_vrsqrts, 1>, 5826 Requires<[HasNEON, HasFullFP16]>; 5827def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5828 IIC_VRECSQ, "vrsqrts", "f16", 5829 v8f16, v8f16, int_arm_neon_vrsqrts, 1>, 5830 Requires<[HasNEON, HasFullFP16]>; 5831 5832// Vector Shifts. 5833 5834// VSHL : Vector Shift 5835defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5836 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5837 "vshl", "s", int_arm_neon_vshifts>; 5838defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5839 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5840 "vshl", "u", int_arm_neon_vshiftu>; 5841 5842let Predicates = [HasNEON] in { 5843def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5844 (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>; 5845def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5846 (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>; 5847def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5848 (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>; 5849def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5850 (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>; 5851def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5852 (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>; 5853def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5854 (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>; 5855def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5856 (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>; 5857def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5858 (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>; 5859 5860def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5861 (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>; 5862def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5863 (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>; 5864def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5865 (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>; 5866def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5867 (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>; 5868def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5869 (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>; 5870def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5871 (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>; 5872def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5873 (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>; 5874def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5875 (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>; 5876 5877} 5878 5879// VSHL : Vector Shift Left (Immediate) 5880defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>; 5881 5882// VSHR : Vector Shift Right (Immediate) 5883defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 5884 ARMvshrsImm>; 5885defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 5886 ARMvshruImm>; 5887 5888// VSHLL : Vector Shift Left Long 5889defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 5890 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>; 5891defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 5892 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>; 5893 5894// VSHLL : Vector Shift Left Long (with maximum shift count) 5895class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 5896 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 5897 ValueType OpTy, Operand ImmTy> 5898 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 5899 ResTy, OpTy, ImmTy, null_frag> { 5900 let Inst{21-16} = op21_16; 5901 let DecoderMethod = "DecodeVSHLMaxInstruction"; 5902} 5903def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 5904 v8i16, v8i8, imm8>; 5905def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 5906 v4i32, v4i16, imm16>; 5907def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 5908 v2i64, v2i32, imm32>; 5909 5910let Predicates = [HasNEON] in { 5911def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))), 5912 (VSHLLi8 DPR:$Rn, 8)>; 5913def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))), 5914 (VSHLLi16 DPR:$Rn, 16)>; 5915def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))), 5916 (VSHLLi32 DPR:$Rn, 32)>; 5917def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))), 5918 (VSHLLi8 DPR:$Rn, 8)>; 5919def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))), 5920 (VSHLLi16 DPR:$Rn, 16)>; 5921def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))), 5922 (VSHLLi32 DPR:$Rn, 32)>; 5923def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))), 5924 (VSHLLi8 DPR:$Rn, 8)>; 5925def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))), 5926 (VSHLLi16 DPR:$Rn, 16)>; 5927def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))), 5928 (VSHLLi32 DPR:$Rn, 32)>; 5929} 5930 5931// VSHRN : Vector Shift Right and Narrow 5932defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 5933 PatFrag<(ops node:$Rn, node:$amt), 5934 (trunc (ARMvshrsImm node:$Rn, node:$amt))>>; 5935 5936let Predicates = [HasNEON] in { 5937def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))), 5938 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 5939def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))), 5940 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 5941def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))), 5942 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 5943} 5944 5945// VRSHL : Vector Rounding Shift 5946defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 5947 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5948 "vrshl", "s", int_arm_neon_vrshifts>; 5949defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 5950 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5951 "vrshl", "u", int_arm_neon_vrshiftu>; 5952// VRSHR : Vector Rounding Shift Right 5953defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 5954 NEONvrshrsImm>; 5955defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 5956 NEONvrshruImm>; 5957 5958// VRSHRN : Vector Rounding Shift Right and Narrow 5959defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 5960 NEONvrshrnImm>; 5961 5962// VQSHL : Vector Saturating Shift 5963defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 5964 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5965 "vqshl", "s", int_arm_neon_vqshifts>; 5966defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 5967 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5968 "vqshl", "u", int_arm_neon_vqshiftu>; 5969// VQSHL : Vector Saturating Shift Left (Immediate) 5970defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>; 5971defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>; 5972 5973// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 5974defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>; 5975 5976// VQSHRN : Vector Saturating Shift Right and Narrow 5977defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 5978 NEONvqshrnsImm>; 5979defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 5980 NEONvqshrnuImm>; 5981 5982// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 5983defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 5984 NEONvqshrnsuImm>; 5985 5986// VQRSHL : Vector Saturating Rounding Shift 5987defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 5988 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5989 "vqrshl", "s", int_arm_neon_vqrshifts>; 5990defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 5991 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5992 "vqrshl", "u", int_arm_neon_vqrshiftu>; 5993 5994// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 5995defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 5996 NEONvqrshrnsImm>; 5997defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 5998 NEONvqrshrnuImm>; 5999 6000// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 6001defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 6002 NEONvqrshrnsuImm>; 6003 6004// VSRA : Vector Shift Right and Accumulate 6005defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>; 6006defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>; 6007// VRSRA : Vector Rounding Shift Right and Accumulate 6008defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>; 6009defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>; 6010 6011// VSLI : Vector Shift Left and Insert 6012defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 6013 6014// VSRI : Vector Shift Right and Insert 6015defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 6016 6017// Vector Absolute and Saturating Absolute. 6018 6019// VABS : Vector Absolute Value 6020defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 6021 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; 6022def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6023 "vabs", "f32", 6024 v2f32, v2f32, fabs>; 6025def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6026 "vabs", "f32", 6027 v4f32, v4f32, fabs>; 6028def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6029 "vabs", "f16", 6030 v4f16, v4f16, fabs>, 6031 Requires<[HasNEON, HasFullFP16]>; 6032def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6033 "vabs", "f16", 6034 v8f16, v8f16, fabs>, 6035 Requires<[HasNEON, HasFullFP16]>; 6036 6037// VQABS : Vector Saturating Absolute Value 6038defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 6039 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 6040 int_arm_neon_vqabs>; 6041 6042// Vector Negate. 6043 6044def vnegd : PatFrag<(ops node:$in), 6045 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 6046def vnegq : PatFrag<(ops node:$in), 6047 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 6048 6049class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6050 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 6051 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 6052 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 6053class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6054 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 6055 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 6056 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 6057 6058// VNEG : Vector Negate (integer) 6059def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 6060def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 6061def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 6062def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 6063def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 6064def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 6065 6066// VNEG : Vector Negate (floating-point) 6067def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 6068 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6069 "vneg", "f32", "$Vd, $Vm", "", 6070 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 6071def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 6072 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6073 "vneg", "f32", "$Vd, $Vm", "", 6074 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 6075def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, 6076 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6077 "vneg", "f16", "$Vd, $Vm", "", 6078 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, 6079 Requires<[HasNEON, HasFullFP16]>; 6080def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, 6081 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6082 "vneg", "f16", "$Vd, $Vm", "", 6083 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, 6084 Requires<[HasNEON, HasFullFP16]>; 6085 6086let Predicates = [HasNEON] in { 6087def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 6088def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 6089def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 6090def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 6091def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 6092def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 6093} 6094 6095// VQNEG : Vector Saturating Negate 6096defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 6097 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 6098 int_arm_neon_vqneg>; 6099 6100// Vector Bit Counting Operations. 6101 6102// VCLS : Vector Count Leading Sign Bits 6103defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 6104 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 6105 int_arm_neon_vcls>; 6106// VCLZ : Vector Count Leading Zeros 6107defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 6108 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 6109 ctlz>; 6110// VCNT : Vector Count One Bits 6111def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6112 IIC_VCNTiD, "vcnt", "8", 6113 v8i8, v8i8, ctpop>; 6114def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6115 IIC_VCNTiQ, "vcnt", "8", 6116 v16i8, v16i8, ctpop>; 6117 6118// Vector Swap 6119def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 6120 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 6121 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6122 []>; 6123def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 6124 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 6125 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6126 []>; 6127 6128// Vector Move Operations. 6129 6130// VMOV : Vector Move (Register) 6131def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6132 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6133def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6134 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6135 6136// VMOV : Vector Move (Immediate) 6137 6138// Although VMOVs are not strictly speaking cheap, they are as expensive 6139// as their copies counterpart (VORR), so we should prefer rematerialization 6140// over splitting when it applies. 6141let isReMaterializable = 1, isAsCheapAsAMove=1 in { 6142def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 6143 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6144 "vmov", "i8", "$Vd, $SIMM", "", 6145 [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>; 6146def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 6147 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6148 "vmov", "i8", "$Vd, $SIMM", "", 6149 [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>; 6150 6151def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 6152 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6153 "vmov", "i16", "$Vd, $SIMM", "", 6154 [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> { 6155 let Inst{9} = SIMM{9}; 6156} 6157 6158def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 6159 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6160 "vmov", "i16", "$Vd, $SIMM", "", 6161 [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> { 6162 let Inst{9} = SIMM{9}; 6163} 6164 6165def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 6166 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6167 "vmov", "i32", "$Vd, $SIMM", "", 6168 [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> { 6169 let Inst{11-8} = SIMM{11-8}; 6170} 6171 6172def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 6173 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6174 "vmov", "i32", "$Vd, $SIMM", "", 6175 [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> { 6176 let Inst{11-8} = SIMM{11-8}; 6177} 6178 6179def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 6180 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6181 "vmov", "i64", "$Vd, $SIMM", "", 6182 [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>; 6183def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 6184 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6185 "vmov", "i64", "$Vd, $SIMM", "", 6186 [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>; 6187 6188def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 6189 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6190 "vmov", "f32", "$Vd, $SIMM", "", 6191 [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>; 6192def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 6193 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6194 "vmov", "f32", "$Vd, $SIMM", "", 6195 [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>; 6196} // isReMaterializable, isAsCheapAsAMove 6197 6198// Add support for bytes replication feature, so it could be GAS compatible. 6199multiclass NEONImmReplicateI8InstAlias<ValueType To> { 6200 // E.g. instructions below: 6201 // "vmov.i32 d0, #0xffffffff" 6202 // "vmov.i32 d0, #0xabababab" 6203 // "vmov.i16 d0, #0xabab" 6204 // are incorrect, but we could deal with such cases. 6205 // For last two instructions, for example, it should emit: 6206 // "vmov.i8 d0, #0xab" 6207 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6208 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6209 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6210 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6211 // Also add same support for VMVN instructions. So instruction: 6212 // "vmvn.i32 d0, #0xabababab" 6213 // actually means: 6214 // "vmov.i8 d0, #0x54" 6215 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6216 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6217 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6218 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6219} 6220 6221defm : NEONImmReplicateI8InstAlias<i16>; 6222defm : NEONImmReplicateI8InstAlias<i32>; 6223defm : NEONImmReplicateI8InstAlias<i64>; 6224 6225// Similar to above for types other than i8, e.g.: 6226// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00" 6227// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000" 6228// In this case we do not canonicalize VMVN to VMOV 6229multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16, 6230 NeonI NV8, NeonI NV16, ValueType To> { 6231 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6232 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6233 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6234 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6235 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6236 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6237 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6238 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6239} 6240 6241defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6242 VMVNv4i16, VMVNv8i16, i32>; 6243defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6244 VMVNv4i16, VMVNv8i16, i64>; 6245defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32, 6246 VMVNv2i32, VMVNv4i32, i64>; 6247// TODO: add "VMOV <-> VMVN" conversion for cases like 6248// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55" 6249// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00" 6250 6251// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 6252// require zero cycles to execute so they should be used wherever possible for 6253// setting a register to zero. 6254 6255// Even without these pseudo-insts we would probably end up with the correct 6256// instruction, but we could not mark the general ones with "isAsCheapAsAMove" 6257// since they are sometimes rather expensive (in general). 6258 6259let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 6260 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 6261 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], 6262 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 6263 Requires<[HasZCZ]>; 6264 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 6265 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], 6266 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 6267 Requires<[HasZCZ]>; 6268} 6269 6270// VMOV : Vector Get Lane (move scalar to ARM core register) 6271 6272def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 6273 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6274 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 6275 [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V), 6276 imm:$lane))]> { 6277 let Inst{21} = lane{2}; 6278 let Inst{6-5} = lane{1-0}; 6279} 6280def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 6281 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6282 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 6283 [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V), 6284 imm:$lane))]> { 6285 let Inst{21} = lane{1}; 6286 let Inst{6} = lane{0}; 6287} 6288def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 6289 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6290 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 6291 [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V), 6292 imm:$lane))]> { 6293 let Inst{21} = lane{2}; 6294 let Inst{6-5} = lane{1-0}; 6295} 6296def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 6297 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6298 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 6299 [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V), 6300 imm:$lane))]> { 6301 let Inst{21} = lane{1}; 6302 let Inst{6} = lane{0}; 6303} 6304def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 6305 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 6306 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 6307 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 6308 imm:$lane))]>, 6309 Requires<[HasFPRegs, HasFastVGETLNi32]> { 6310 let Inst{21} = lane{0}; 6311} 6312let Predicates = [HasNEON] in { 6313// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 6314def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane), 6315 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6316 (DSubReg_i8_reg imm:$lane))), 6317 (SubReg_i8_lane imm:$lane))>; 6318def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane), 6319 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6320 (DSubReg_i16_reg imm:$lane))), 6321 (SubReg_i16_lane imm:$lane))>; 6322def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane), 6323 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6324 (DSubReg_i8_reg imm:$lane))), 6325 (SubReg_i8_lane imm:$lane))>; 6326def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane), 6327 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6328 (DSubReg_i16_reg imm:$lane))), 6329 (SubReg_i16_lane imm:$lane))>; 6330} 6331def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6332 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 6333 (DSubReg_i32_reg imm:$lane))), 6334 (SubReg_i32_lane imm:$lane))>, 6335 Requires<[HasNEON, HasFastVGETLNi32]>; 6336def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 6337 (COPY_TO_REGCLASS 6338 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6339 Requires<[HasNEON, HasSlowVGETLNi32]>; 6340def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6341 (COPY_TO_REGCLASS 6342 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6343 Requires<[HasNEON, HasSlowVGETLNi32]>; 6344let Predicates = [HasNEON] in { 6345def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 6346 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 6347 (SSubReg_f32_reg imm:$src2))>; 6348def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 6349 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 6350 (SSubReg_f32_reg imm:$src2))>; 6351//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 6352// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6353def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 6354 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6355} 6356 6357def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>; 6358def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>; 6359 6360let Predicates = [HasNEON] in { 6361def : Pat<(extractelt (v4f16 DPR:$src), imm_even:$lane), 6362 (EXTRACT_SUBREG 6363 (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)), 6364 (SSubReg_f16_reg imm_even:$lane))>; 6365 6366def : Pat<(extractelt (v4f16 DPR:$src), imm_odd:$lane), 6367 (COPY_TO_REGCLASS 6368 (VMOVH (EXTRACT_SUBREG 6369 (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)), 6370 (SSubReg_f16_reg imm_odd:$lane))), 6371 HPR)>; 6372 6373def : Pat<(extractelt (v8f16 QPR:$src), imm_even:$lane), 6374 (EXTRACT_SUBREG 6375 (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)), 6376 (SSubReg_f16_reg imm_even:$lane))>; 6377 6378def : Pat<(extractelt (v8f16 QPR:$src), imm_odd:$lane), 6379 (COPY_TO_REGCLASS 6380 (VMOVH (EXTRACT_SUBREG 6381 (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)), 6382 (SSubReg_f16_reg imm_odd:$lane))), 6383 HPR)>; 6384} 6385 6386// VMOV : Vector Set Lane (move ARM core register to scalar) 6387 6388let Constraints = "$src1 = $V" in { 6389def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 6390 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 6391 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 6392 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 6393 GPR:$R, imm:$lane))]> { 6394 let Inst{21} = lane{2}; 6395 let Inst{6-5} = lane{1-0}; 6396} 6397def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 6398 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 6399 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 6400 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 6401 GPR:$R, imm:$lane))]> { 6402 let Inst{21} = lane{1}; 6403 let Inst{6} = lane{0}; 6404} 6405def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 6406 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 6407 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 6408 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 6409 GPR:$R, imm:$lane))]>, 6410 Requires<[HasVFP2]> { 6411 let Inst{21} = lane{0}; 6412 // This instruction is equivalent as 6413 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 6414 let isInsertSubreg = 1; 6415} 6416} 6417 6418let Predicates = [HasNEON] in { 6419def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 6420 (v16i8 (INSERT_SUBREG QPR:$src1, 6421 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 6422 (DSubReg_i8_reg imm:$lane))), 6423 GPR:$src2, (SubReg_i8_lane imm:$lane))), 6424 (DSubReg_i8_reg imm:$lane)))>; 6425def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 6426 (v8i16 (INSERT_SUBREG QPR:$src1, 6427 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6428 (DSubReg_i16_reg imm:$lane))), 6429 GPR:$src2, (SubReg_i16_lane imm:$lane))), 6430 (DSubReg_i16_reg imm:$lane)))>; 6431def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 6432 (v4i32 (INSERT_SUBREG QPR:$src1, 6433 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 6434 (DSubReg_i32_reg imm:$lane))), 6435 GPR:$src2, (SubReg_i32_lane imm:$lane))), 6436 (DSubReg_i32_reg imm:$lane)))>; 6437 6438def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 6439 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 6440 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6441def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 6442 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 6443 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6444 6445def : Pat<(insertelt (v4f16 DPR:$src1), HPR:$src2, imm:$lane), 6446 (v4f16 (VSETLNi16 DPR:$src1, (VMOVRH $src2), imm:$lane))>; 6447def : Pat<(insertelt (v8f16 QPR:$src1), HPR:$src2, imm:$lane), 6448 (v8f16 (INSERT_SUBREG QPR:$src1, 6449 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6450 (DSubReg_i16_reg imm:$lane))), 6451 (VMOVRH $src2), (SubReg_i16_lane imm:$lane))), 6452 (DSubReg_i16_reg imm:$lane)))>; 6453 6454//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6455// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6456def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6457 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6458 6459def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 6460 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6461def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 6462 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 6463def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 6464 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6465 6466def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 6467 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6468def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 6469 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6470def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 6471 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6472 6473def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 6474 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6475 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6476 dsub_0)>; 6477def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 6478 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6479 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6480 dsub_0)>; 6481def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 6482 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6483 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6484 dsub_0)>; 6485} 6486 6487// VDUP : Vector Duplicate (from ARM core register to all elements) 6488 6489class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6490 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 6491 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6492 [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6493class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6494 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 6495 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6496 [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6497 6498def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 6499def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 6500def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 6501 Requires<[HasNEON, HasFastVDUP32]>; 6502def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 6503def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 6504def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 6505 6506// ARMvdup patterns for uarchs with fast VDUP.32. 6507def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 6508 Requires<[HasNEON,HasFastVDUP32]>; 6509def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>, 6510 Requires<[HasNEON]>; 6511 6512// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 6513def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 6514 Requires<[HasNEON,HasSlowVDUP32]>; 6515def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 6516 Requires<[HasNEON,HasSlowVDUP32]>; 6517 6518// VDUP : Vector Duplicate Lane (from scalar to all elements) 6519 6520class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 6521 ValueType Ty, Operand IdxTy> 6522 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6523 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 6524 [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>; 6525 6526class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 6527 ValueType ResTy, ValueType OpTy, Operand IdxTy> 6528 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6529 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 6530 [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm), 6531 VectorIndex32:$lane)))]>; 6532 6533// Inst{19-16} is partially specified depending on the element size. 6534 6535def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 6536 bits<3> lane; 6537 let Inst{19-17} = lane{2-0}; 6538} 6539def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 6540 bits<2> lane; 6541 let Inst{19-18} = lane{1-0}; 6542} 6543def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 6544 bits<1> lane; 6545 let Inst{19} = lane{0}; 6546} 6547def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 6548 bits<3> lane; 6549 let Inst{19-17} = lane{2-0}; 6550} 6551def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 6552 bits<2> lane; 6553 let Inst{19-18} = lane{1-0}; 6554} 6555def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 6556 bits<1> lane; 6557 let Inst{19} = lane{0}; 6558} 6559 6560let Predicates = [HasNEON] in { 6561def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)), 6562 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6563 6564def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6565 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6566 6567def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6568 (VDUPLN32q DPR:$Vm, imm:$lane)>; 6569 6570def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)), 6571 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 6572 (DSubReg_i8_reg imm:$lane))), 6573 (SubReg_i8_lane imm:$lane)))>; 6574def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)), 6575 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 6576 (DSubReg_i16_reg imm:$lane))), 6577 (SubReg_i16_lane imm:$lane)))>; 6578def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)), 6579 (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src, 6580 (DSubReg_i16_reg imm:$lane))), 6581 (SubReg_i16_lane imm:$lane)))>; 6582def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)), 6583 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 6584 (DSubReg_i32_reg imm:$lane))), 6585 (SubReg_i32_lane imm:$lane)))>; 6586def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)), 6587 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 6588 (DSubReg_i32_reg imm:$lane))), 6589 (SubReg_i32_lane imm:$lane)))>; 6590 6591def : Pat<(v4f16 (ARMvdup HPR:$src)), 6592 (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6593 HPR:$src, ssub_0), (i32 0)))>; 6594def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))), 6595 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6596 SPR:$src, ssub_0), (i32 0)))>; 6597def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))), 6598 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6599 SPR:$src, ssub_0), (i32 0)))>; 6600def : Pat<(v8f16 (ARMvdup HPR:$src)), 6601 (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6602 HPR:$src, ssub_0), (i32 0)))>; 6603} 6604 6605// VMOVN : Vector Narrowing Move 6606defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 6607 "vmovn", "i", trunc>; 6608// VQMOVN : Vector Saturating Narrowing Move 6609defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 6610 "vqmovn", "s", int_arm_neon_vqmovns>; 6611defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 6612 "vqmovn", "u", int_arm_neon_vqmovnu>; 6613defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 6614 "vqmovun", "s", int_arm_neon_vqmovnsu>; 6615// VMOVL : Vector Lengthening Move 6616defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 6617defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 6618 6619let Predicates = [HasNEON] in { 6620def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 6621def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 6622def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 6623} 6624 6625// Vector Conversions. 6626 6627// VCVT : Vector Convert Between Floating-Point and Integers 6628def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6629 v2i32, v2f32, fp_to_sint>; 6630def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6631 v2i32, v2f32, fp_to_uint>; 6632def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6633 v2f32, v2i32, sint_to_fp>; 6634def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6635 v2f32, v2i32, uint_to_fp>; 6636 6637def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6638 v4i32, v4f32, fp_to_sint>; 6639def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6640 v4i32, v4f32, fp_to_uint>; 6641def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6642 v4f32, v4i32, sint_to_fp>; 6643def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6644 v4f32, v4i32, uint_to_fp>; 6645 6646def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6647 v4i16, v4f16, fp_to_sint>, 6648 Requires<[HasNEON, HasFullFP16]>; 6649def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6650 v4i16, v4f16, fp_to_uint>, 6651 Requires<[HasNEON, HasFullFP16]>; 6652def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6653 v4f16, v4i16, sint_to_fp>, 6654 Requires<[HasNEON, HasFullFP16]>; 6655def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6656 v4f16, v4i16, uint_to_fp>, 6657 Requires<[HasNEON, HasFullFP16]>; 6658 6659def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6660 v8i16, v8f16, fp_to_sint>, 6661 Requires<[HasNEON, HasFullFP16]>; 6662def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6663 v8i16, v8f16, fp_to_uint>, 6664 Requires<[HasNEON, HasFullFP16]>; 6665def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6666 v8f16, v8i16, sint_to_fp>, 6667 Requires<[HasNEON, HasFullFP16]>; 6668def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6669 v8f16, v8i16, uint_to_fp>, 6670 Requires<[HasNEON, HasFullFP16]>; 6671 6672// VCVT{A, N, P, M} 6673multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 6674 SDPatternOperator IntU> { 6675 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6676 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6677 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 6678 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6679 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 6680 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6681 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 6682 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6683 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 6684 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6685 "s16.f16", v4i16, v4f16, IntS>, 6686 Requires<[HasV8, HasNEON, HasFullFP16]>; 6687 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6688 "s16.f16", v8i16, v8f16, IntS>, 6689 Requires<[HasV8, HasNEON, HasFullFP16]>; 6690 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6691 "u16.f16", v4i16, v4f16, IntU>, 6692 Requires<[HasV8, HasNEON, HasFullFP16]>; 6693 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6694 "u16.f16", v8i16, v8f16, IntU>, 6695 Requires<[HasV8, HasNEON, HasFullFP16]>; 6696 } 6697} 6698 6699defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 6700defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 6701defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 6702defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 6703 6704// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 6705let DecoderMethod = "DecodeVCVTD" in { 6706def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6707 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 6708def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6709 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 6710def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6711 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 6712def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6713 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 6714let Predicates = [HasNEON, HasFullFP16] in { 6715def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6716 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; 6717def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6718 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; 6719def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6720 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; 6721def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6722 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; 6723} // Predicates = [HasNEON, HasFullFP16] 6724} 6725 6726let DecoderMethod = "DecodeVCVTQ" in { 6727def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6728 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 6729def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6730 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 6731def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6732 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 6733def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6734 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 6735let Predicates = [HasNEON, HasFullFP16] in { 6736def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6737 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; 6738def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6739 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; 6740def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6741 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; 6742def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6743 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; 6744} // Predicates = [HasNEON, HasFullFP16] 6745} 6746 6747def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 6748 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6749def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 6750 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6751def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 6752 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6753def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 6754 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6755 6756def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 6757 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6758def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 6759 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6760def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 6761 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6762def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 6763 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6764 6765def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", 6766 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6767def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", 6768 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6769def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", 6770 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6771def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", 6772 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6773 6774def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", 6775 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6776def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", 6777 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6778def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", 6779 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6780def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", 6781 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6782 6783 6784// VCVT : Vector Convert Between Half-Precision and Single-Precision. 6785def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 6786 IIC_VUNAQ, "vcvt", "f16.f32", 6787 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 6788 Requires<[HasNEON, HasFP16]>; 6789def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 6790 IIC_VUNAQ, "vcvt", "f32.f16", 6791 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 6792 Requires<[HasNEON, HasFP16]>; 6793 6794// Vector Reverse. 6795 6796// VREV64 : Vector Reverse elements within 64-bit doublewords 6797 6798class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6799 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 6800 (ins DPR:$Vm), IIC_VMOVD, 6801 OpcodeStr, Dt, "$Vd, $Vm", "", 6802 [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>; 6803class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6804 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 6805 (ins QPR:$Vm), IIC_VMOVQ, 6806 OpcodeStr, Dt, "$Vd, $Vm", "", 6807 [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>; 6808 6809def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 6810def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 6811def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 6812let Predicates = [HasNEON] in { 6813def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 6814} 6815 6816def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 6817def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 6818def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 6819 6820let Predicates = [HasNEON] in { 6821 def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), 6822 (VREV64q32 QPR:$Vm)>; 6823 def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), 6824 (VREV64q16 QPR:$Vm)>; 6825 def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), 6826 (VREV64d16 DPR:$Vm)>; 6827} 6828 6829// VREV32 : Vector Reverse elements within 32-bit words 6830 6831class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6832 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 6833 (ins DPR:$Vm), IIC_VMOVD, 6834 OpcodeStr, Dt, "$Vd, $Vm", "", 6835 [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>; 6836class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6837 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 6838 (ins QPR:$Vm), IIC_VMOVQ, 6839 OpcodeStr, Dt, "$Vd, $Vm", "", 6840 [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>; 6841 6842def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 6843def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 6844 6845def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 6846def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 6847 6848let Predicates = [HasNEON] in { 6849 def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))), 6850 (VREV32q16 QPR:$Vm)>; 6851 def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))), 6852 (VREV32d16 DPR:$Vm)>; 6853} 6854 6855// VREV16 : Vector Reverse elements within 16-bit halfwords 6856 6857class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6858 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 6859 (ins DPR:$Vm), IIC_VMOVD, 6860 OpcodeStr, Dt, "$Vd, $Vm", "", 6861 [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>; 6862class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6863 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 6864 (ins QPR:$Vm), IIC_VMOVQ, 6865 OpcodeStr, Dt, "$Vd, $Vm", "", 6866 [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>; 6867 6868def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 6869def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 6870 6871// Other Vector Shuffles. 6872 6873// Aligned extractions: really just dropping registers 6874 6875class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 6876 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 6877 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>, 6878 Requires<[HasNEON]>; 6879 6880def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 6881 6882def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 6883 6884def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 6885 6886def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 6887 6888def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 6889 6890def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16 6891 6892// VEXT : Vector Extract 6893 6894 6895// All of these have a two-operand InstAlias. 6896let TwoOperandAliasConstraint = "$Vn = $Vd" in { 6897class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6898 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 6899 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 6900 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6901 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 6902 (Ty DPR:$Vm), imm:$index)))]> { 6903 bits<3> index; 6904 let Inst{11} = 0b0; 6905 let Inst{10-8} = index{2-0}; 6906} 6907 6908class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6909 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 6910 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 6911 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6912 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 6913 (Ty QPR:$Vm), imm:$index)))]> { 6914 bits<4> index; 6915 let Inst{11-8} = index{3-0}; 6916} 6917} 6918 6919def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 6920 let Inst{10-8} = index{2-0}; 6921} 6922def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 6923 let Inst{10-9} = index{1-0}; 6924 let Inst{8} = 0b0; 6925} 6926let Predicates = [HasNEON] in { 6927def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))), 6928 (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>; 6929} 6930 6931def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 6932 let Inst{10} = index{0}; 6933 let Inst{9-8} = 0b00; 6934} 6935let Predicates = [HasNEON] in { 6936def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))), 6937 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 6938} 6939 6940def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 6941 let Inst{11-8} = index{3-0}; 6942} 6943def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 6944 let Inst{11-9} = index{2-0}; 6945 let Inst{8} = 0b0; 6946} 6947let Predicates = [HasNEON] in { 6948def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))), 6949 (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>; 6950} 6951 6952def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 6953 let Inst{11-10} = index{1-0}; 6954 let Inst{9-8} = 0b00; 6955} 6956def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 6957 let Inst{11} = index{0}; 6958 let Inst{10-8} = 0b000; 6959} 6960let Predicates = [HasNEON] in { 6961def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), 6962 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 6963} 6964 6965// VTRN : Vector Transpose 6966 6967def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 6968def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 6969def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 6970 6971def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 6972def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 6973def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 6974 6975// VUZP : Vector Unzip (Deinterleave) 6976 6977def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 6978def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 6979// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6980def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 6981 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6982 6983def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 6984def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 6985def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 6986 6987// VZIP : Vector Zip (Interleave) 6988 6989def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 6990def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 6991// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6992def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 6993 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6994 6995def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 6996def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 6997def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 6998 6999// Vector Table Lookup and Table Extension. 7000 7001// VTBL : Vector Table Lookup 7002let DecoderMethod = "DecodeTBLInstruction" in { 7003def VTBL1 7004 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 7005 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 7006 "vtbl", "8", "$Vd, $Vn, $Vm", "", 7007 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 7008 7009let hasExtraSrcRegAllocReq = 1 in { 7010def VTBL2 7011 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 7012 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 7013 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7014def VTBL3 7015 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 7016 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 7017 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7018def VTBL4 7019 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 7020 (ins VecListFourD:$Vn, DPR:$Vm), 7021 NVTBLFrm, IIC_VTB4, 7022 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7023} // hasExtraSrcRegAllocReq = 1 7024 7025def VTBL3Pseudo 7026 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 7027def VTBL4Pseudo 7028 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 7029 7030// VTBX : Vector Table Extension 7031def VTBX1 7032 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 7033 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 7034 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 7035 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 7036 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 7037let hasExtraSrcRegAllocReq = 1 in { 7038def VTBX2 7039 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 7040 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 7041 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 7042def VTBX3 7043 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 7044 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 7045 NVTBLFrm, IIC_VTBX3, 7046 "vtbx", "8", "$Vd, $Vn, $Vm", 7047 "$orig = $Vd", []>; 7048def VTBX4 7049 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 7050 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 7051 "vtbx", "8", "$Vd, $Vn, $Vm", 7052 "$orig = $Vd", []>; 7053} // hasExtraSrcRegAllocReq = 1 7054 7055def VTBX3Pseudo 7056 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7057 IIC_VTBX3, "$orig = $dst", []>; 7058def VTBX4Pseudo 7059 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7060 IIC_VTBX4, "$orig = $dst", []>; 7061} // DecoderMethod = "DecodeTBLInstruction" 7062 7063let Predicates = [HasNEON] in { 7064def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), 7065 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7066 v8i8:$Vn1, dsub_1), 7067 v8i8:$Vm))>; 7068def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7069 v8i8:$Vm)), 7070 (v8i8 (VTBX2 v8i8:$orig, 7071 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7072 v8i8:$Vn1, dsub_1), 7073 v8i8:$Vm))>; 7074 7075def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, 7076 v8i8:$Vn2, v8i8:$Vm)), 7077 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7078 v8i8:$Vn1, dsub_1, 7079 v8i8:$Vn2, dsub_2, 7080 (v8i8 (IMPLICIT_DEF)), dsub_3), 7081 v8i8:$Vm))>; 7082def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7083 v8i8:$Vn2, v8i8:$Vm)), 7084 (v8i8 (VTBX3Pseudo v8i8:$orig, 7085 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7086 v8i8:$Vn1, dsub_1, 7087 v8i8:$Vn2, dsub_2, 7088 (v8i8 (IMPLICIT_DEF)), dsub_3), 7089 v8i8:$Vm))>; 7090 7091def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, 7092 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7093 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7094 v8i8:$Vn1, dsub_1, 7095 v8i8:$Vn2, dsub_2, 7096 v8i8:$Vn3, dsub_3), 7097 v8i8:$Vm))>; 7098def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7099 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7100 (v8i8 (VTBX4Pseudo v8i8:$orig, 7101 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7102 v8i8:$Vn1, dsub_1, 7103 v8i8:$Vn2, dsub_2, 7104 v8i8:$Vn3, dsub_3), 7105 v8i8:$Vm))>; 7106} 7107 7108// VRINT : Vector Rounding 7109multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 7110 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 7111 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7112 !strconcat("vrint", op), "f32", 7113 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 7114 let Inst{9-7} = op9_7; 7115 } 7116 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7117 !strconcat("vrint", op), "f32", 7118 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 7119 let Inst{9-7} = op9_7; 7120 } 7121 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7122 !strconcat("vrint", op), "f16", 7123 v4f16, v4f16, Int>, 7124 Requires<[HasV8, HasNEON, HasFullFP16]> { 7125 let Inst{9-7} = op9_7; 7126 } 7127 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7128 !strconcat("vrint", op), "f16", 7129 v8f16, v8f16, Int>, 7130 Requires<[HasV8, HasNEON, HasFullFP16]> { 7131 let Inst{9-7} = op9_7; 7132 } 7133 } 7134 7135 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 7136 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>; 7137 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 7138 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; 7139 let Predicates = [HasNEON, HasFullFP16] in { 7140 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"), 7141 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; 7142 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"), 7143 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; 7144 } 7145} 7146 7147defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 7148defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 7149defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 7150defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 7151defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 7152defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 7153 7154// Cryptography instructions 7155let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 7156 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 7157 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 7158 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7159 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 7160 Requires<[HasV8, HasCrypto]>; 7161 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 7162 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7163 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 7164 Requires<[HasV8, HasCrypto]>; 7165 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7166 SDPatternOperator Int> 7167 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7168 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 7169 Requires<[HasV8, HasCrypto]>; 7170 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7171 SDPatternOperator Int> 7172 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7173 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 7174 Requires<[HasV8, HasCrypto]>; 7175 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 7176 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 7177 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, 7178 Requires<[HasV8, HasCrypto]>; 7179} 7180 7181def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 7182def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 7183def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 7184def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 7185 7186def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 7187def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 7188def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 7189def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 7190def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 7191def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 7192def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 7193def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 7194def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 7195def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 7196 7197let Predicates = [HasNEON] in { 7198def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 7199 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 7200 (SHA1H (SUBREG_TO_REG (i64 0), 7201 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 7202 ssub_0)), 7203 ssub_0)), GPR)>; 7204 7205def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7206 (SHA1C v4i32:$hash_abcd, 7207 (SUBREG_TO_REG (i64 0), 7208 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7209 ssub_0), 7210 v4i32:$wk)>; 7211 7212def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7213 (SHA1M v4i32:$hash_abcd, 7214 (SUBREG_TO_REG (i64 0), 7215 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7216 ssub_0), 7217 v4i32:$wk)>; 7218 7219def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7220 (SHA1P v4i32:$hash_abcd, 7221 (SUBREG_TO_REG (i64 0), 7222 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7223 ssub_0), 7224 v4i32:$wk)>; 7225} 7226 7227//===----------------------------------------------------------------------===// 7228// NEON instructions for single-precision FP math 7229//===----------------------------------------------------------------------===// 7230 7231class N2VSPat<SDNode OpNode, NeonI Inst> 7232 : NEONFPPat<(f32 (OpNode SPR:$a)), 7233 (EXTRACT_SUBREG 7234 (v2f32 (COPY_TO_REGCLASS (Inst 7235 (INSERT_SUBREG 7236 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7237 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 7238 7239class N3VSPat<SDNode OpNode, NeonI Inst> 7240 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 7241 (EXTRACT_SUBREG 7242 (v2f32 (COPY_TO_REGCLASS (Inst 7243 (INSERT_SUBREG 7244 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7245 SPR:$a, ssub_0), 7246 (INSERT_SUBREG 7247 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7248 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7249 7250class N3VSPatFP16<SDNode OpNode, NeonI Inst> 7251 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)), 7252 (EXTRACT_SUBREG 7253 (v4f16 (COPY_TO_REGCLASS (Inst 7254 (INSERT_SUBREG 7255 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7256 HPR:$a, ssub_0), 7257 (INSERT_SUBREG 7258 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7259 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7260 7261class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 7262 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 7263 (EXTRACT_SUBREG 7264 (v2f32 (COPY_TO_REGCLASS (Inst 7265 (INSERT_SUBREG 7266 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7267 SPR:$acc, ssub_0), 7268 (INSERT_SUBREG 7269 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7270 SPR:$a, ssub_0), 7271 (INSERT_SUBREG 7272 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7273 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7274 7275class NVCVTIFPat<SDNode OpNode, NeonI Inst> 7276 : NEONFPPat<(f32 (OpNode GPR:$a)), 7277 (f32 (EXTRACT_SUBREG 7278 (v2f32 (Inst 7279 (INSERT_SUBREG 7280 (v2f32 (IMPLICIT_DEF)), 7281 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 7282 ssub_0))>; 7283class NVCVTFIPat<SDNode OpNode, NeonI Inst> 7284 : NEONFPPat<(i32 (OpNode SPR:$a)), 7285 (i32 (EXTRACT_SUBREG 7286 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 7287 SPR:$a, ssub_0))), 7288 ssub_0))>; 7289 7290def : N3VSPat<fadd, VADDfd>; 7291def : N3VSPat<fsub, VSUBfd>; 7292def : N3VSPat<fmul, VMULfd>; 7293def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 7294 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7295def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 7296 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7297def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 7298 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7299def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 7300 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7301def : N2VSPat<fabs, VABSfd>; 7302def : N2VSPat<fneg, VNEGfd>; 7303def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>; 7304def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>; 7305def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>; 7306def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>; 7307def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 7308def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 7309def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 7310def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 7311 7312// NEON doesn't have any f64 conversions, so provide patterns to make 7313// sure the VFP conversions match when extracting from a vector. 7314def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7315 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7316def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7317 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7318def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7319 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7320def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7321 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7322 7323 7324// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 7325def : Pat<(f32 (bitconvert GPR:$a)), 7326 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7327 Requires<[HasNEON, DontUseVMOVSR]>; 7328def : Pat<(arm_vmovsr GPR:$a), 7329 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7330 Requires<[HasNEON, DontUseVMOVSR]>; 7331 7332//===----------------------------------------------------------------------===// 7333// Non-Instruction Patterns or Endiness - Revert Patterns 7334//===----------------------------------------------------------------------===// 7335 7336// bit_convert 7337// 64 bit conversions 7338let Predicates = [HasNEON] in { 7339def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 7340def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 7341 7342def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 7343def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 7344 7345def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>; 7346def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>; 7347 7348// 128 bit conversions 7349def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 7350def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 7351 7352def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 7353def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 7354 7355def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; 7356def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>; 7357} 7358 7359let Predicates = [IsLE,HasNEON] in { 7360 // 64 bit conversions 7361 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 7362 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 7363 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>; 7364 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 7365 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 7366 7367 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 7368 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 7369 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>; 7370 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 7371 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 7372 7373 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 7374 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 7375 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>; 7376 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 7377 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 7378 7379 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 7380 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 7381 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>; 7382 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 7383 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 7384 7385 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>; 7386 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>; 7387 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>; 7388 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>; 7389 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>; 7390 7391 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 7392 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 7393 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 7394 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 7395 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 7396 7397 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 7398 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 7399 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 7400 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 7401 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>; 7402 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 7403 7404 // 128 bit conversions 7405 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 7406 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 7407 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; 7408 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 7409 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 7410 7411 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 7412 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 7413 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>; 7414 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 7415 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 7416 7417 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 7418 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 7419 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>; 7420 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 7421 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 7422 7423 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 7424 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 7425 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>; 7426 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 7427 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 7428 7429 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; 7430 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>; 7431 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>; 7432 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>; 7433 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>; 7434 7435 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 7436 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 7437 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 7438 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 7439 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 7440 7441 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 7442 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 7443 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 7444 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 7445 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; 7446 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 7447} 7448 7449let Predicates = [IsBE,HasNEON] in { 7450 // 64 bit conversions 7451 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7452 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7453 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7454 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7455 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7456 7457 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7458 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7459 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7460 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7461 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7462 7463 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7464 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7465 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7466 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7467 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7468 7469 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7470 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7471 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7472 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7473 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7474 7475 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7476 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7477 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7478 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7479 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7480 7481 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7482 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7483 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7484 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7485 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7486 7487 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 7488 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 7489 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 7490 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 7491 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>; 7492 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 7493 7494 // 128 bit conversions 7495 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7496 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7497 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7498 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7499 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7500 7501 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7502 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7503 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7504 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7505 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7506 7507 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7508 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7509 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7510 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7511 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7512 7513 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7514 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7515 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7516 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7517 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7518 7519 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7520 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7521 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7522 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7523 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7524 7525 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7526 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7527 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7528 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7529 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7530 7531 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 7532 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 7533 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 7534 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 7535 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>; 7536 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 7537} 7538 7539// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian 7540let Predicates = [IsBE,HasNEON] in { 7541def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 7542 (VREV64q8 (VLD1q8 addrmode6:$addr))>; 7543def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7544 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>; 7545def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 7546 (VREV64q16 (VLD1q16 addrmode6:$addr))>; 7547def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7548 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>; 7549} 7550 7551// Fold extracting an element out of a v2i32 into a vfp register. 7552def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 7553 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>, 7554 Requires<[HasNEON]>; 7555 7556// Vector lengthening move with load, matching extending loads. 7557 7558// extload, zextload and sextload for a standard lengthening load. Example: 7559// Lengthen_Single<"8", "i16", "8"> = 7560// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 7561// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 7562// (f64 (IMPLICIT_DEF)), (i32 0)))>; 7563multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 7564 let AddedComplexity = 10 in { 7565 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7566 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 7567 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7568 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7569 Requires<[HasNEON]>; 7570 7571 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7572 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 7573 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7574 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7575 Requires<[HasNEON]>; 7576 7577 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7578 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 7579 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 7580 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7581 Requires<[HasNEON]>; 7582 } 7583} 7584 7585// extload, zextload and sextload for a lengthening load which only uses 7586// half the lanes available. Example: 7587// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 7588// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 7589// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7590// (f64 (IMPLICIT_DEF)), (i32 0))), 7591// dsub_0)>; 7592multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 7593 string InsnLanes, string InsnTy> { 7594 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7595 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7596 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7597 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7598 dsub_0)>, 7599 Requires<[HasNEON]>; 7600 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7601 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7602 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7603 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7604 dsub_0)>, 7605 Requires<[HasNEON]>; 7606 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7607 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7608 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7609 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7610 dsub_0)>, 7611 Requires<[HasNEON]>; 7612} 7613 7614// The following class definition is basically a copy of the 7615// Lengthen_HalfSingle definition above, however with an additional parameter 7616// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7617// data loaded by VLD1LN into proper vector format in big endian mode. 7618multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7619 string InsnLanes, string InsnTy, string RevLanes> { 7620 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7621 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7622 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7623 (!cast<Instruction>("VREV32d" # RevLanes) 7624 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7625 dsub_0)>, 7626 Requires<[HasNEON]>; 7627 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7628 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7629 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7630 (!cast<Instruction>("VREV32d" # RevLanes) 7631 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7632 dsub_0)>, 7633 Requires<[HasNEON]>; 7634 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7635 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7636 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7637 (!cast<Instruction>("VREV32d" # RevLanes) 7638 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7639 dsub_0)>, 7640 Requires<[HasNEON]>; 7641} 7642 7643// extload, zextload and sextload for a lengthening load followed by another 7644// lengthening load, to quadruple the initial length. 7645// 7646// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 7647// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 7648// (EXTRACT_SUBREG (VMOVLuv4i32 7649// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7650// (f64 (IMPLICIT_DEF)), 7651// (i32 0))), 7652// dsub_0)), 7653// dsub_0)>; 7654multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 7655 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7656 string Insn2Ty> { 7657 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7658 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7659 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7660 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7661 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7662 dsub_0))>, 7663 Requires<[HasNEON]>; 7664 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7665 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7666 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7667 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7668 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7669 dsub_0))>, 7670 Requires<[HasNEON]>; 7671 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7672 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7673 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7674 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7675 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7676 dsub_0))>, 7677 Requires<[HasNEON]>; 7678} 7679 7680// The following class definition is basically a copy of the 7681// Lengthen_Double definition above, however with an additional parameter 7682// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7683// data loaded by VLD1LN into proper vector format in big endian mode. 7684multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7685 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7686 string Insn2Ty, string RevLanes> { 7687 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7688 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7689 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7690 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7691 (!cast<Instruction>("VREV32d" # RevLanes) 7692 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7693 dsub_0))>, 7694 Requires<[HasNEON]>; 7695 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7696 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7697 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7698 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7699 (!cast<Instruction>("VREV32d" # RevLanes) 7700 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7701 dsub_0))>, 7702 Requires<[HasNEON]>; 7703 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7704 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7705 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7706 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7707 (!cast<Instruction>("VREV32d" # RevLanes) 7708 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7709 dsub_0))>, 7710 Requires<[HasNEON]>; 7711} 7712 7713// extload, zextload and sextload for a lengthening load followed by another 7714// lengthening load, to quadruple the initial length, but which ends up only 7715// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 7716// 7717// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 7718// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 7719// (EXTRACT_SUBREG (VMOVLuv4i32 7720// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 7721// (f64 (IMPLICIT_DEF)), (i32 0))), 7722// dsub_0)), 7723// dsub_0)>; 7724multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 7725 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7726 string Insn2Ty> { 7727 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7728 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7729 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7730 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7731 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7732 dsub_0)), 7733 dsub_0)>, 7734 Requires<[HasNEON]>; 7735 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7736 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7737 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7738 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7739 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7740 dsub_0)), 7741 dsub_0)>, 7742 Requires<[HasNEON]>; 7743 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7744 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7745 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7746 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7747 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7748 dsub_0)), 7749 dsub_0)>, 7750 Requires<[HasNEON]>; 7751} 7752 7753// The following class definition is basically a copy of the 7754// Lengthen_HalfDouble definition above, however with an additional VREV16d8 7755// instruction to convert data loaded by VLD1LN into proper vector format 7756// in big endian mode. 7757multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7758 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7759 string Insn2Ty> { 7760 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7761 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7762 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7763 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7764 (!cast<Instruction>("VREV16d8") 7765 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7766 dsub_0)), 7767 dsub_0)>, 7768 Requires<[HasNEON]>; 7769 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7770 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7771 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7772 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7773 (!cast<Instruction>("VREV16d8") 7774 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7775 dsub_0)), 7776 dsub_0)>, 7777 Requires<[HasNEON]>; 7778 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7779 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7780 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7781 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7782 (!cast<Instruction>("VREV16d8") 7783 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7784 dsub_0)), 7785 dsub_0)>, 7786 Requires<[HasNEON]>; 7787} 7788 7789defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 7790defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 7791defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 7792 7793let Predicates = [HasNEON,IsLE] in { 7794 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 7795 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 7796 7797 // Double lengthening - v4i8 -> v4i16 -> v4i32 7798 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 7799 // v2i8 -> v2i16 -> v2i32 7800 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 7801 // v2i16 -> v2i32 -> v2i64 7802 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 7803} 7804 7805let Predicates = [HasNEON,IsBE] in { 7806 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 7807 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 7808 7809 // Double lengthening - v4i8 -> v4i16 -> v4i32 7810 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 7811 // v2i8 -> v2i16 -> v2i32 7812 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 7813 // v2i16 -> v2i32 -> v2i64 7814 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 7815} 7816 7817// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 7818let Predicates = [HasNEON,IsLE] in { 7819 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7820 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7821 (VLD1LNd16 addrmode6:$addr, 7822 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7823 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7824 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7825 (VLD1LNd16 addrmode6:$addr, 7826 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7827 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7828 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7829 (VLD1LNd16 addrmode6:$addr, 7830 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7831} 7832// The following patterns are basically a copy of the patterns above, 7833// however with an additional VREV16d instruction to convert data 7834// loaded by VLD1LN into proper vector format in big endian mode. 7835let Predicates = [HasNEON,IsBE] in { 7836 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7837 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7838 (!cast<Instruction>("VREV16d8") 7839 (VLD1LNd16 addrmode6:$addr, 7840 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7841 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7842 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7843 (!cast<Instruction>("VREV16d8") 7844 (VLD1LNd16 addrmode6:$addr, 7845 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7846 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7847 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7848 (!cast<Instruction>("VREV16d8") 7849 (VLD1LNd16 addrmode6:$addr, 7850 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7851} 7852 7853let Predicates = [HasNEON] in { 7854def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), 7855 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7856def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), 7857 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7858def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), 7859 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7860def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), 7861 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7862def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), 7863 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7864def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)), 7865 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7866} 7867 7868//===----------------------------------------------------------------------===// 7869// Assembler aliases 7870// 7871 7872def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 7873 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 7874def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 7875 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 7876 7877// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 7878defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 7879 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7880defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 7881 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7882defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 7883 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7884defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 7885 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7886defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 7887 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7888defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 7889 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7890defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 7891 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7892defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 7893 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7894// ... two-operand aliases 7895defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 7896 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7897defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 7898 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7899defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 7900 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7901defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 7902 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7903defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 7904 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7905defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 7906 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7907// ... immediates 7908def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 7909 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 7910def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 7911 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 7912def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 7913 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 7914def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 7915 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 7916 7917 7918// VLD1 single-lane pseudo-instructions. These need special handling for 7919// the lane index that an InstAlias can't handle, so we use these instead. 7920def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 7921 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7922 pred:$p)>; 7923def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 7924 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7925 pred:$p)>; 7926def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 7927 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7928 pred:$p)>; 7929 7930def VLD1LNdWB_fixed_Asm_8 : 7931 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 7932 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7933 pred:$p)>; 7934def VLD1LNdWB_fixed_Asm_16 : 7935 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 7936 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7937 pred:$p)>; 7938def VLD1LNdWB_fixed_Asm_32 : 7939 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 7940 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7941 pred:$p)>; 7942def VLD1LNdWB_register_Asm_8 : 7943 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 7944 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7945 rGPR:$Rm, pred:$p)>; 7946def VLD1LNdWB_register_Asm_16 : 7947 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 7948 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7949 rGPR:$Rm, pred:$p)>; 7950def VLD1LNdWB_register_Asm_32 : 7951 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 7952 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7953 rGPR:$Rm, pred:$p)>; 7954 7955 7956// VST1 single-lane pseudo-instructions. These need special handling for 7957// the lane index that an InstAlias can't handle, so we use these instead. 7958def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 7959 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7960 pred:$p)>; 7961def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 7962 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7963 pred:$p)>; 7964def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 7965 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7966 pred:$p)>; 7967 7968def VST1LNdWB_fixed_Asm_8 : 7969 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 7970 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7971 pred:$p)>; 7972def VST1LNdWB_fixed_Asm_16 : 7973 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 7974 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7975 pred:$p)>; 7976def VST1LNdWB_fixed_Asm_32 : 7977 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 7978 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7979 pred:$p)>; 7980def VST1LNdWB_register_Asm_8 : 7981 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 7982 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7983 rGPR:$Rm, pred:$p)>; 7984def VST1LNdWB_register_Asm_16 : 7985 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 7986 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7987 rGPR:$Rm, pred:$p)>; 7988def VST1LNdWB_register_Asm_32 : 7989 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 7990 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7991 rGPR:$Rm, pred:$p)>; 7992 7993// VLD2 single-lane pseudo-instructions. These need special handling for 7994// the lane index that an InstAlias can't handle, so we use these instead. 7995def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 7996 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7997 pred:$p)>; 7998def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 7999 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8000 pred:$p)>; 8001def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8002 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 8003def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8004 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8005 pred:$p)>; 8006def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8007 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8008 pred:$p)>; 8009 8010def VLD2LNdWB_fixed_Asm_8 : 8011 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 8012 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8013 pred:$p)>; 8014def VLD2LNdWB_fixed_Asm_16 : 8015 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8016 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8017 pred:$p)>; 8018def VLD2LNdWB_fixed_Asm_32 : 8019 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8020 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8021 pred:$p)>; 8022def VLD2LNqWB_fixed_Asm_16 : 8023 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8024 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8025 pred:$p)>; 8026def VLD2LNqWB_fixed_Asm_32 : 8027 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8028 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8029 pred:$p)>; 8030def VLD2LNdWB_register_Asm_8 : 8031 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 8032 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8033 rGPR:$Rm, pred:$p)>; 8034def VLD2LNdWB_register_Asm_16 : 8035 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8036 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8037 rGPR:$Rm, pred:$p)>; 8038def VLD2LNdWB_register_Asm_32 : 8039 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8040 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8041 rGPR:$Rm, pred:$p)>; 8042def VLD2LNqWB_register_Asm_16 : 8043 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8044 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8045 rGPR:$Rm, pred:$p)>; 8046def VLD2LNqWB_register_Asm_32 : 8047 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8048 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8049 rGPR:$Rm, pred:$p)>; 8050 8051 8052// VST2 single-lane pseudo-instructions. These need special handling for 8053// the lane index that an InstAlias can't handle, so we use these instead. 8054def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 8055 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8056 pred:$p)>; 8057def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8058 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8059 pred:$p)>; 8060def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8061 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8062 pred:$p)>; 8063def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8064 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8065 pred:$p)>; 8066def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8067 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8068 pred:$p)>; 8069 8070def VST2LNdWB_fixed_Asm_8 : 8071 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 8072 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8073 pred:$p)>; 8074def VST2LNdWB_fixed_Asm_16 : 8075 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8076 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8077 pred:$p)>; 8078def VST2LNdWB_fixed_Asm_32 : 8079 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8080 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8081 pred:$p)>; 8082def VST2LNqWB_fixed_Asm_16 : 8083 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8084 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8085 pred:$p)>; 8086def VST2LNqWB_fixed_Asm_32 : 8087 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8088 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8089 pred:$p)>; 8090def VST2LNdWB_register_Asm_8 : 8091 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 8092 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8093 rGPR:$Rm, pred:$p)>; 8094def VST2LNdWB_register_Asm_16 : 8095 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8096 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8097 rGPR:$Rm, pred:$p)>; 8098def VST2LNdWB_register_Asm_32 : 8099 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8100 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8101 rGPR:$Rm, pred:$p)>; 8102def VST2LNqWB_register_Asm_16 : 8103 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8104 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8105 rGPR:$Rm, pred:$p)>; 8106def VST2LNqWB_register_Asm_32 : 8107 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8108 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8109 rGPR:$Rm, pred:$p)>; 8110 8111// VLD3 all-lanes pseudo-instructions. These need special handling for 8112// the lane index that an InstAlias can't handle, so we use these instead. 8113def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8114 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8115 pred:$p)>; 8116def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8117 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8118 pred:$p)>; 8119def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8120 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8121 pred:$p)>; 8122def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8123 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8124 pred:$p)>; 8125def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8126 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8127 pred:$p)>; 8128def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8129 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8130 pred:$p)>; 8131 8132def VLD3DUPdWB_fixed_Asm_8 : 8133 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8134 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8135 pred:$p)>; 8136def VLD3DUPdWB_fixed_Asm_16 : 8137 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8138 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8139 pred:$p)>; 8140def VLD3DUPdWB_fixed_Asm_32 : 8141 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8142 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8143 pred:$p)>; 8144def VLD3DUPqWB_fixed_Asm_8 : 8145 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8146 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8147 pred:$p)>; 8148def VLD3DUPqWB_fixed_Asm_16 : 8149 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8150 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8151 pred:$p)>; 8152def VLD3DUPqWB_fixed_Asm_32 : 8153 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8154 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8155 pred:$p)>; 8156def VLD3DUPdWB_register_Asm_8 : 8157 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8158 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8159 rGPR:$Rm, pred:$p)>; 8160def VLD3DUPdWB_register_Asm_16 : 8161 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8162 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8163 rGPR:$Rm, pred:$p)>; 8164def VLD3DUPdWB_register_Asm_32 : 8165 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8166 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8167 rGPR:$Rm, pred:$p)>; 8168def VLD3DUPqWB_register_Asm_8 : 8169 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8170 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8171 rGPR:$Rm, pred:$p)>; 8172def VLD3DUPqWB_register_Asm_16 : 8173 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8174 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8175 rGPR:$Rm, pred:$p)>; 8176def VLD3DUPqWB_register_Asm_32 : 8177 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8178 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8179 rGPR:$Rm, pred:$p)>; 8180 8181 8182// VLD3 single-lane pseudo-instructions. These need special handling for 8183// the lane index that an InstAlias can't handle, so we use these instead. 8184def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8185 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8186 pred:$p)>; 8187def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8188 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8189 pred:$p)>; 8190def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8191 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8192 pred:$p)>; 8193def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8194 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8195 pred:$p)>; 8196def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8197 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8198 pred:$p)>; 8199 8200def VLD3LNdWB_fixed_Asm_8 : 8201 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8202 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8203 pred:$p)>; 8204def VLD3LNdWB_fixed_Asm_16 : 8205 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8206 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8207 pred:$p)>; 8208def VLD3LNdWB_fixed_Asm_32 : 8209 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8210 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8211 pred:$p)>; 8212def VLD3LNqWB_fixed_Asm_16 : 8213 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8214 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8215 pred:$p)>; 8216def VLD3LNqWB_fixed_Asm_32 : 8217 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8218 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8219 pred:$p)>; 8220def VLD3LNdWB_register_Asm_8 : 8221 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8222 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8223 rGPR:$Rm, pred:$p)>; 8224def VLD3LNdWB_register_Asm_16 : 8225 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8226 (ins VecListThreeDHWordIndexed:$list, 8227 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8228def VLD3LNdWB_register_Asm_32 : 8229 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8230 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8231 rGPR:$Rm, pred:$p)>; 8232def VLD3LNqWB_register_Asm_16 : 8233 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8234 (ins VecListThreeQHWordIndexed:$list, 8235 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8236def VLD3LNqWB_register_Asm_32 : 8237 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8238 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8239 rGPR:$Rm, pred:$p)>; 8240 8241// VLD3 multiple structure pseudo-instructions. These need special handling for 8242// the vector operands that the normal instructions don't yet model. 8243// FIXME: Remove these when the register classes and instructions are updated. 8244def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8245 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8246def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8247 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8248def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8249 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8250def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8251 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8252def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8253 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8254def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8255 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8256 8257def VLD3dWB_fixed_Asm_8 : 8258 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8259 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8260def VLD3dWB_fixed_Asm_16 : 8261 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8262 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8263def VLD3dWB_fixed_Asm_32 : 8264 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8265 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8266def VLD3qWB_fixed_Asm_8 : 8267 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8268 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8269def VLD3qWB_fixed_Asm_16 : 8270 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8271 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8272def VLD3qWB_fixed_Asm_32 : 8273 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8274 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8275def VLD3dWB_register_Asm_8 : 8276 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8277 (ins VecListThreeD:$list, addrmode6align64:$addr, 8278 rGPR:$Rm, pred:$p)>; 8279def VLD3dWB_register_Asm_16 : 8280 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8281 (ins VecListThreeD:$list, addrmode6align64:$addr, 8282 rGPR:$Rm, pred:$p)>; 8283def VLD3dWB_register_Asm_32 : 8284 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8285 (ins VecListThreeD:$list, addrmode6align64:$addr, 8286 rGPR:$Rm, pred:$p)>; 8287def VLD3qWB_register_Asm_8 : 8288 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8289 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8290 rGPR:$Rm, pred:$p)>; 8291def VLD3qWB_register_Asm_16 : 8292 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8293 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8294 rGPR:$Rm, pred:$p)>; 8295def VLD3qWB_register_Asm_32 : 8296 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8297 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8298 rGPR:$Rm, pred:$p)>; 8299 8300// VST3 single-lane pseudo-instructions. These need special handling for 8301// the lane index that an InstAlias can't handle, so we use these instead. 8302def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8303 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8304 pred:$p)>; 8305def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8306 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8307 pred:$p)>; 8308def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8309 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8310 pred:$p)>; 8311def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8312 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8313 pred:$p)>; 8314def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8315 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8316 pred:$p)>; 8317 8318def VST3LNdWB_fixed_Asm_8 : 8319 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8320 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8321 pred:$p)>; 8322def VST3LNdWB_fixed_Asm_16 : 8323 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8324 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8325 pred:$p)>; 8326def VST3LNdWB_fixed_Asm_32 : 8327 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8328 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8329 pred:$p)>; 8330def VST3LNqWB_fixed_Asm_16 : 8331 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8332 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8333 pred:$p)>; 8334def VST3LNqWB_fixed_Asm_32 : 8335 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8336 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8337 pred:$p)>; 8338def VST3LNdWB_register_Asm_8 : 8339 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8340 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8341 rGPR:$Rm, pred:$p)>; 8342def VST3LNdWB_register_Asm_16 : 8343 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8344 (ins VecListThreeDHWordIndexed:$list, 8345 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8346def VST3LNdWB_register_Asm_32 : 8347 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8348 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8349 rGPR:$Rm, pred:$p)>; 8350def VST3LNqWB_register_Asm_16 : 8351 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8352 (ins VecListThreeQHWordIndexed:$list, 8353 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8354def VST3LNqWB_register_Asm_32 : 8355 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8356 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8357 rGPR:$Rm, pred:$p)>; 8358 8359 8360// VST3 multiple structure pseudo-instructions. These need special handling for 8361// the vector operands that the normal instructions don't yet model. 8362// FIXME: Remove these when the register classes and instructions are updated. 8363def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8364 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8365def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8366 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8367def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8368 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8369def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8370 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8371def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8372 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8373def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8374 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8375 8376def VST3dWB_fixed_Asm_8 : 8377 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8378 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8379def VST3dWB_fixed_Asm_16 : 8380 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8381 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8382def VST3dWB_fixed_Asm_32 : 8383 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8384 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8385def VST3qWB_fixed_Asm_8 : 8386 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8387 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8388def VST3qWB_fixed_Asm_16 : 8389 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8390 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8391def VST3qWB_fixed_Asm_32 : 8392 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8393 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8394def VST3dWB_register_Asm_8 : 8395 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8396 (ins VecListThreeD:$list, addrmode6align64:$addr, 8397 rGPR:$Rm, pred:$p)>; 8398def VST3dWB_register_Asm_16 : 8399 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8400 (ins VecListThreeD:$list, addrmode6align64:$addr, 8401 rGPR:$Rm, pred:$p)>; 8402def VST3dWB_register_Asm_32 : 8403 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8404 (ins VecListThreeD:$list, addrmode6align64:$addr, 8405 rGPR:$Rm, pred:$p)>; 8406def VST3qWB_register_Asm_8 : 8407 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8408 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8409 rGPR:$Rm, pred:$p)>; 8410def VST3qWB_register_Asm_16 : 8411 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8412 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8413 rGPR:$Rm, pred:$p)>; 8414def VST3qWB_register_Asm_32 : 8415 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8416 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8417 rGPR:$Rm, pred:$p)>; 8418 8419// VLD4 all-lanes pseudo-instructions. These need special handling for 8420// the lane index that an InstAlias can't handle, so we use these instead. 8421def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8422 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8423 pred:$p)>; 8424def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8425 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8426 pred:$p)>; 8427def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8428 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8429 pred:$p)>; 8430def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8431 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8432 pred:$p)>; 8433def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8434 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8435 pred:$p)>; 8436def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8437 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8438 pred:$p)>; 8439 8440def VLD4DUPdWB_fixed_Asm_8 : 8441 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8442 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8443 pred:$p)>; 8444def VLD4DUPdWB_fixed_Asm_16 : 8445 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8446 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8447 pred:$p)>; 8448def VLD4DUPdWB_fixed_Asm_32 : 8449 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8450 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8451 pred:$p)>; 8452def VLD4DUPqWB_fixed_Asm_8 : 8453 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8454 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8455 pred:$p)>; 8456def VLD4DUPqWB_fixed_Asm_16 : 8457 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8458 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8459 pred:$p)>; 8460def VLD4DUPqWB_fixed_Asm_32 : 8461 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8462 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8463 pred:$p)>; 8464def VLD4DUPdWB_register_Asm_8 : 8465 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8466 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8467 rGPR:$Rm, pred:$p)>; 8468def VLD4DUPdWB_register_Asm_16 : 8469 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8470 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8471 rGPR:$Rm, pred:$p)>; 8472def VLD4DUPdWB_register_Asm_32 : 8473 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8474 (ins VecListFourDAllLanes:$list, 8475 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8476def VLD4DUPqWB_register_Asm_8 : 8477 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8478 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8479 rGPR:$Rm, pred:$p)>; 8480def VLD4DUPqWB_register_Asm_16 : 8481 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8482 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8483 rGPR:$Rm, pred:$p)>; 8484def VLD4DUPqWB_register_Asm_32 : 8485 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8486 (ins VecListFourQAllLanes:$list, 8487 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8488 8489 8490// VLD4 single-lane pseudo-instructions. These need special handling for 8491// the lane index that an InstAlias can't handle, so we use these instead. 8492def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8493 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8494 pred:$p)>; 8495def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8496 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8497 pred:$p)>; 8498def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8499 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8500 pred:$p)>; 8501def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8502 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8503 pred:$p)>; 8504def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8505 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8506 pred:$p)>; 8507 8508def VLD4LNdWB_fixed_Asm_8 : 8509 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8510 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8511 pred:$p)>; 8512def VLD4LNdWB_fixed_Asm_16 : 8513 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8514 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8515 pred:$p)>; 8516def VLD4LNdWB_fixed_Asm_32 : 8517 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8518 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8519 pred:$p)>; 8520def VLD4LNqWB_fixed_Asm_16 : 8521 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8522 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8523 pred:$p)>; 8524def VLD4LNqWB_fixed_Asm_32 : 8525 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8526 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8527 pred:$p)>; 8528def VLD4LNdWB_register_Asm_8 : 8529 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8530 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8531 rGPR:$Rm, pred:$p)>; 8532def VLD4LNdWB_register_Asm_16 : 8533 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8534 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8535 rGPR:$Rm, pred:$p)>; 8536def VLD4LNdWB_register_Asm_32 : 8537 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8538 (ins VecListFourDWordIndexed:$list, 8539 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8540def VLD4LNqWB_register_Asm_16 : 8541 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8542 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8543 rGPR:$Rm, pred:$p)>; 8544def VLD4LNqWB_register_Asm_32 : 8545 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8546 (ins VecListFourQWordIndexed:$list, 8547 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8548 8549 8550 8551// VLD4 multiple structure pseudo-instructions. These need special handling for 8552// the vector operands that the normal instructions don't yet model. 8553// FIXME: Remove these when the register classes and instructions are updated. 8554def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8555 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8556 pred:$p)>; 8557def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8558 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8559 pred:$p)>; 8560def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8561 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8562 pred:$p)>; 8563def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8564 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8565 pred:$p)>; 8566def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8567 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8568 pred:$p)>; 8569def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8570 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8571 pred:$p)>; 8572 8573def VLD4dWB_fixed_Asm_8 : 8574 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8575 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8576 pred:$p)>; 8577def VLD4dWB_fixed_Asm_16 : 8578 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8579 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8580 pred:$p)>; 8581def VLD4dWB_fixed_Asm_32 : 8582 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8583 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8584 pred:$p)>; 8585def VLD4qWB_fixed_Asm_8 : 8586 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8587 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8588 pred:$p)>; 8589def VLD4qWB_fixed_Asm_16 : 8590 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8591 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8592 pred:$p)>; 8593def VLD4qWB_fixed_Asm_32 : 8594 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8595 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8596 pred:$p)>; 8597def VLD4dWB_register_Asm_8 : 8598 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8599 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8600 rGPR:$Rm, pred:$p)>; 8601def VLD4dWB_register_Asm_16 : 8602 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8603 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8604 rGPR:$Rm, pred:$p)>; 8605def VLD4dWB_register_Asm_32 : 8606 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8607 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8608 rGPR:$Rm, pred:$p)>; 8609def VLD4qWB_register_Asm_8 : 8610 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8611 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8612 rGPR:$Rm, pred:$p)>; 8613def VLD4qWB_register_Asm_16 : 8614 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8615 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8616 rGPR:$Rm, pred:$p)>; 8617def VLD4qWB_register_Asm_32 : 8618 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8619 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8620 rGPR:$Rm, pred:$p)>; 8621 8622// VST4 single-lane pseudo-instructions. These need special handling for 8623// the lane index that an InstAlias can't handle, so we use these instead. 8624def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8625 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8626 pred:$p)>; 8627def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8628 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8629 pred:$p)>; 8630def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8631 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8632 pred:$p)>; 8633def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8634 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8635 pred:$p)>; 8636def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8637 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8638 pred:$p)>; 8639 8640def VST4LNdWB_fixed_Asm_8 : 8641 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8642 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8643 pred:$p)>; 8644def VST4LNdWB_fixed_Asm_16 : 8645 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8646 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8647 pred:$p)>; 8648def VST4LNdWB_fixed_Asm_32 : 8649 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8650 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8651 pred:$p)>; 8652def VST4LNqWB_fixed_Asm_16 : 8653 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8654 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8655 pred:$p)>; 8656def VST4LNqWB_fixed_Asm_32 : 8657 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8658 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8659 pred:$p)>; 8660def VST4LNdWB_register_Asm_8 : 8661 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8662 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8663 rGPR:$Rm, pred:$p)>; 8664def VST4LNdWB_register_Asm_16 : 8665 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8666 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8667 rGPR:$Rm, pred:$p)>; 8668def VST4LNdWB_register_Asm_32 : 8669 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8670 (ins VecListFourDWordIndexed:$list, 8671 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8672def VST4LNqWB_register_Asm_16 : 8673 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8674 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8675 rGPR:$Rm, pred:$p)>; 8676def VST4LNqWB_register_Asm_32 : 8677 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8678 (ins VecListFourQWordIndexed:$list, 8679 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8680 8681 8682// VST4 multiple structure pseudo-instructions. These need special handling for 8683// the vector operands that the normal instructions don't yet model. 8684// FIXME: Remove these when the register classes and instructions are updated. 8685def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8686 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8687 pred:$p)>; 8688def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8689 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8690 pred:$p)>; 8691def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8692 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8693 pred:$p)>; 8694def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8695 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8696 pred:$p)>; 8697def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8698 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8699 pred:$p)>; 8700def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8701 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8702 pred:$p)>; 8703 8704def VST4dWB_fixed_Asm_8 : 8705 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8706 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8707 pred:$p)>; 8708def VST4dWB_fixed_Asm_16 : 8709 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8710 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8711 pred:$p)>; 8712def VST4dWB_fixed_Asm_32 : 8713 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8714 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8715 pred:$p)>; 8716def VST4qWB_fixed_Asm_8 : 8717 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8718 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8719 pred:$p)>; 8720def VST4qWB_fixed_Asm_16 : 8721 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8722 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8723 pred:$p)>; 8724def VST4qWB_fixed_Asm_32 : 8725 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8726 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8727 pred:$p)>; 8728def VST4dWB_register_Asm_8 : 8729 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8730 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8731 rGPR:$Rm, pred:$p)>; 8732def VST4dWB_register_Asm_16 : 8733 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8734 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8735 rGPR:$Rm, pred:$p)>; 8736def VST4dWB_register_Asm_32 : 8737 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8738 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8739 rGPR:$Rm, pred:$p)>; 8740def VST4qWB_register_Asm_8 : 8741 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8742 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8743 rGPR:$Rm, pred:$p)>; 8744def VST4qWB_register_Asm_16 : 8745 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8746 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8747 rGPR:$Rm, pred:$p)>; 8748def VST4qWB_register_Asm_32 : 8749 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8750 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8751 rGPR:$Rm, pred:$p)>; 8752 8753// VMOV/VMVN takes an optional datatype suffix 8754defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8755 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 8756defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8757 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 8758 8759defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8760 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 8761defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8762 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 8763 8764// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8765// D-register versions. 8766def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 8767 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8768def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 8769 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8770def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 8771 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8772def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 8773 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8774def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 8775 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8776def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 8777 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8778def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 8779 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8780let Predicates = [HasNEON, HasFullFP16] in 8781def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", 8782 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8783// Q-register versions. 8784def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 8785 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8786def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 8787 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8788def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 8789 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8790def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 8791 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8792def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 8793 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8794def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 8795 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8796def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 8797 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8798let Predicates = [HasNEON, HasFullFP16] in 8799def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", 8800 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8801 8802// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8803// D-register versions. 8804def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 8805 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8806def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 8807 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8808def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 8809 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8810def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 8811 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8812def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 8813 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8814def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 8815 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8816def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 8817 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8818let Predicates = [HasNEON, HasFullFP16] in 8819def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", 8820 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8821// Q-register versions. 8822def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 8823 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8824def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 8825 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8826def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 8827 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8828def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 8829 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8830def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 8831 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8832def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 8833 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8834def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 8835 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8836let Predicates = [HasNEON, HasFullFP16] in 8837def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", 8838 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8839 8840// VSWP allows, but does not require, a type suffix. 8841defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8842 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 8843defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8844 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 8845 8846// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 8847defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8848 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8849defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8850 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8851defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8852 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8853defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8854 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8855defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8856 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8857defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8858 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8859 8860// "vmov Rd, #-imm" can be handled via "vmvn". 8861def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8862 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8863def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8864 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8865def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8866 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8867def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8868 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8869 8870// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 8871// these should restrict to just the Q register variants, but the register 8872// classes are enough to match correctly regardless, so we keep it simple 8873// and just use MnemonicAlias. 8874def : NEONMnemonicAlias<"vbicq", "vbic">; 8875def : NEONMnemonicAlias<"vandq", "vand">; 8876def : NEONMnemonicAlias<"veorq", "veor">; 8877def : NEONMnemonicAlias<"vorrq", "vorr">; 8878 8879def : NEONMnemonicAlias<"vmovq", "vmov">; 8880def : NEONMnemonicAlias<"vmvnq", "vmvn">; 8881// Explicit versions for floating point so that the FPImm variants get 8882// handled early. The parser gets confused otherwise. 8883def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 8884def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 8885 8886def : NEONMnemonicAlias<"vaddq", "vadd">; 8887def : NEONMnemonicAlias<"vsubq", "vsub">; 8888 8889def : NEONMnemonicAlias<"vminq", "vmin">; 8890def : NEONMnemonicAlias<"vmaxq", "vmax">; 8891 8892def : NEONMnemonicAlias<"vmulq", "vmul">; 8893 8894def : NEONMnemonicAlias<"vabsq", "vabs">; 8895 8896def : NEONMnemonicAlias<"vshlq", "vshl">; 8897def : NEONMnemonicAlias<"vshrq", "vshr">; 8898 8899def : NEONMnemonicAlias<"vcvtq", "vcvt">; 8900 8901def : NEONMnemonicAlias<"vcleq", "vcle">; 8902def : NEONMnemonicAlias<"vceqq", "vceq">; 8903 8904def : NEONMnemonicAlias<"vzipq", "vzip">; 8905def : NEONMnemonicAlias<"vswpq", "vswp">; 8906 8907def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 8908def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 8909 8910 8911// Alias for loading floating point immediates that aren't representable 8912// using the vmov.f32 encoding but the bitpattern is representable using 8913// the .i32 encoding. 8914def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 8915 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 8916def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 8917 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 8918