1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the ARM NEON instruction set. 10// 11//===----------------------------------------------------------------------===// 12 13 14//===----------------------------------------------------------------------===// 15// NEON-specific Operands. 16//===----------------------------------------------------------------------===// 17def nModImm : Operand<i32> { 18 let PrintMethod = "printNEONModImmOperand"; 19} 20 21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 22def nImmSplatI8 : Operand<i32> { 23 let PrintMethod = "printNEONModImmOperand"; 24 let ParserMatchClass = nImmSplatI8AsmOperand; 25} 26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 27def nImmSplatI16 : Operand<i32> { 28 let PrintMethod = "printNEONModImmOperand"; 29 let ParserMatchClass = nImmSplatI16AsmOperand; 30} 31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 32def nImmSplatI32 : Operand<i32> { 33 let PrintMethod = "printNEONModImmOperand"; 34 let ParserMatchClass = nImmSplatI32AsmOperand; 35} 36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 37def nImmSplatNotI16 : Operand<i32> { 38 let ParserMatchClass = nImmSplatNotI16AsmOperand; 39} 40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 41def nImmSplatNotI32 : Operand<i32> { 42 let ParserMatchClass = nImmSplatNotI32AsmOperand; 43} 44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 45def nImmVMOVI32 : Operand<i32> { 46 let PrintMethod = "printNEONModImmOperand"; 47 let ParserMatchClass = nImmVMOVI32AsmOperand; 48} 49 50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To> 51 : AsmOperandClass { 52 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate"; 53 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">"; 54 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands"; 55} 56 57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To> 58 : AsmOperandClass { 59 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate"; 60 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">"; 61 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands"; 62} 63 64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> { 65 let PrintMethod = "printNEONModImmOperand"; 66 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>; 67} 68 69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> { 70 let PrintMethod = "printNEONModImmOperand"; 71 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>; 72} 73 74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 75def nImmVMOVI32Neg : Operand<i32> { 76 let PrintMethod = "printNEONModImmOperand"; 77 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 78} 79def nImmVMOVF32 : Operand<i32> { 80 let PrintMethod = "printFPImmOperand"; 81 let ParserMatchClass = FPImmOperand; 82} 83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 84def nImmSplatI64 : Operand<i32> { 85 let PrintMethod = "printNEONModImmOperand"; 86 let ParserMatchClass = nImmSplatI64AsmOperand; 87} 88 89def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } 93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 94 return ((uint64_t)Imm) < 8; 95}]> { 96 let ParserMatchClass = VectorIndex8Operand; 97 let PrintMethod = "printVectorIndex"; 98 let MIOperandInfo = (ops i32imm); 99} 100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 101 return ((uint64_t)Imm) < 4; 102}]> { 103 let ParserMatchClass = VectorIndex16Operand; 104 let PrintMethod = "printVectorIndex"; 105 let MIOperandInfo = (ops i32imm); 106} 107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 108 return ((uint64_t)Imm) < 2; 109}]> { 110 let ParserMatchClass = VectorIndex32Operand; 111 let PrintMethod = "printVectorIndex"; 112 let MIOperandInfo = (ops i32imm); 113} 114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{ 115 return ((uint64_t)Imm) < 1; 116}]> { 117 let ParserMatchClass = VectorIndex64Operand; 118 let PrintMethod = "printVectorIndex"; 119 let MIOperandInfo = (ops i32imm); 120} 121 122// Register list of one D register. 123def VecListOneDAsmOperand : AsmOperandClass { 124 let Name = "VecListOneD"; 125 let ParserMethod = "parseVectorList"; 126 let RenderMethod = "addVecListOperands"; 127} 128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 129 let ParserMatchClass = VecListOneDAsmOperand; 130} 131// Register list of two sequential D registers. 132def VecListDPairAsmOperand : AsmOperandClass { 133 let Name = "VecListDPair"; 134 let ParserMethod = "parseVectorList"; 135 let RenderMethod = "addVecListOperands"; 136} 137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 138 let ParserMatchClass = VecListDPairAsmOperand; 139} 140// Register list of three sequential D registers. 141def VecListThreeDAsmOperand : AsmOperandClass { 142 let Name = "VecListThreeD"; 143 let ParserMethod = "parseVectorList"; 144 let RenderMethod = "addVecListOperands"; 145} 146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 147 let ParserMatchClass = VecListThreeDAsmOperand; 148} 149// Register list of four sequential D registers. 150def VecListFourDAsmOperand : AsmOperandClass { 151 let Name = "VecListFourD"; 152 let ParserMethod = "parseVectorList"; 153 let RenderMethod = "addVecListOperands"; 154} 155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 156 let ParserMatchClass = VecListFourDAsmOperand; 157} 158// Register list of two D registers spaced by 2 (two sequential Q registers). 159def VecListDPairSpacedAsmOperand : AsmOperandClass { 160 let Name = "VecListDPairSpaced"; 161 let ParserMethod = "parseVectorList"; 162 let RenderMethod = "addVecListOperands"; 163} 164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 165 let ParserMatchClass = VecListDPairSpacedAsmOperand; 166} 167// Register list of three D registers spaced by 2 (three Q registers). 168def VecListThreeQAsmOperand : AsmOperandClass { 169 let Name = "VecListThreeQ"; 170 let ParserMethod = "parseVectorList"; 171 let RenderMethod = "addVecListOperands"; 172} 173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 174 let ParserMatchClass = VecListThreeQAsmOperand; 175} 176// Register list of three D registers spaced by 2 (three Q registers). 177def VecListFourQAsmOperand : AsmOperandClass { 178 let Name = "VecListFourQ"; 179 let ParserMethod = "parseVectorList"; 180 let RenderMethod = "addVecListOperands"; 181} 182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 183 let ParserMatchClass = VecListFourQAsmOperand; 184} 185 186// Register list of one D register, with "all lanes" subscripting. 187def VecListOneDAllLanesAsmOperand : AsmOperandClass { 188 let Name = "VecListOneDAllLanes"; 189 let ParserMethod = "parseVectorList"; 190 let RenderMethod = "addVecListOperands"; 191} 192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 193 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 194} 195// Register list of two D registers, with "all lanes" subscripting. 196def VecListDPairAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListDPairAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200} 201def VecListDPairAllLanes : RegisterOperand<DPair, 202 "printVectorListTwoAllLanes"> { 203 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 204} 205// Register list of two D registers spaced by 2 (two sequential Q registers). 206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 207 let Name = "VecListDPairSpacedAllLanes"; 208 let ParserMethod = "parseVectorList"; 209 let RenderMethod = "addVecListOperands"; 210} 211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc, 212 "printVectorListTwoSpacedAllLanes"> { 213 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 214} 215// Register list of three D registers, with "all lanes" subscripting. 216def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 217 let Name = "VecListThreeDAllLanes"; 218 let ParserMethod = "parseVectorList"; 219 let RenderMethod = "addVecListOperands"; 220} 221def VecListThreeDAllLanes : RegisterOperand<DPR, 222 "printVectorListThreeAllLanes"> { 223 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 224} 225// Register list of three D registers spaced by 2 (three sequential Q regs). 226def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 227 let Name = "VecListThreeQAllLanes"; 228 let ParserMethod = "parseVectorList"; 229 let RenderMethod = "addVecListOperands"; 230} 231def VecListThreeQAllLanes : RegisterOperand<DPR, 232 "printVectorListThreeSpacedAllLanes"> { 233 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 234} 235// Register list of four D registers, with "all lanes" subscripting. 236def VecListFourDAllLanesAsmOperand : AsmOperandClass { 237 let Name = "VecListFourDAllLanes"; 238 let ParserMethod = "parseVectorList"; 239 let RenderMethod = "addVecListOperands"; 240} 241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 242 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 243} 244// Register list of four D registers spaced by 2 (four sequential Q regs). 245def VecListFourQAllLanesAsmOperand : AsmOperandClass { 246 let Name = "VecListFourQAllLanes"; 247 let ParserMethod = "parseVectorList"; 248 let RenderMethod = "addVecListOperands"; 249} 250def VecListFourQAllLanes : RegisterOperand<DPR, 251 "printVectorListFourSpacedAllLanes"> { 252 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 253} 254 255 256// Register list of one D register, with byte lane subscripting. 257def VecListOneDByteIndexAsmOperand : AsmOperandClass { 258 let Name = "VecListOneDByteIndexed"; 259 let ParserMethod = "parseVectorList"; 260 let RenderMethod = "addVecListIndexedOperands"; 261} 262def VecListOneDByteIndexed : Operand<i32> { 263 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 264 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 265} 266// ...with half-word lane subscripting. 267def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 268 let Name = "VecListOneDHWordIndexed"; 269 let ParserMethod = "parseVectorList"; 270 let RenderMethod = "addVecListIndexedOperands"; 271} 272def VecListOneDHWordIndexed : Operand<i32> { 273 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 274 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 275} 276// ...with word lane subscripting. 277def VecListOneDWordIndexAsmOperand : AsmOperandClass { 278 let Name = "VecListOneDWordIndexed"; 279 let ParserMethod = "parseVectorList"; 280 let RenderMethod = "addVecListIndexedOperands"; 281} 282def VecListOneDWordIndexed : Operand<i32> { 283 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 284 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 285} 286 287// Register list of two D registers with byte lane subscripting. 288def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoDByteIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListTwoDByteIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297// ...with half-word lane subscripting. 298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 299 let Name = "VecListTwoDHWordIndexed"; 300 let ParserMethod = "parseVectorList"; 301 let RenderMethod = "addVecListIndexedOperands"; 302} 303def VecListTwoDHWordIndexed : Operand<i32> { 304 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 305 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 306} 307// ...with word lane subscripting. 308def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 309 let Name = "VecListTwoDWordIndexed"; 310 let ParserMethod = "parseVectorList"; 311 let RenderMethod = "addVecListIndexedOperands"; 312} 313def VecListTwoDWordIndexed : Operand<i32> { 314 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 315 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 316} 317// Register list of two Q registers with half-word lane subscripting. 318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 319 let Name = "VecListTwoQHWordIndexed"; 320 let ParserMethod = "parseVectorList"; 321 let RenderMethod = "addVecListIndexedOperands"; 322} 323def VecListTwoQHWordIndexed : Operand<i32> { 324 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 325 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 326} 327// ...with word lane subscripting. 328def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 329 let Name = "VecListTwoQWordIndexed"; 330 let ParserMethod = "parseVectorList"; 331 let RenderMethod = "addVecListIndexedOperands"; 332} 333def VecListTwoQWordIndexed : Operand<i32> { 334 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 335 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 336} 337 338 339// Register list of three D registers with byte lane subscripting. 340def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeDByteIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344} 345def VecListThreeDByteIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348} 349// ...with half-word lane subscripting. 350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 351 let Name = "VecListThreeDHWordIndexed"; 352 let ParserMethod = "parseVectorList"; 353 let RenderMethod = "addVecListIndexedOperands"; 354} 355def VecListThreeDHWordIndexed : Operand<i32> { 356 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 357 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 358} 359// ...with word lane subscripting. 360def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 361 let Name = "VecListThreeDWordIndexed"; 362 let ParserMethod = "parseVectorList"; 363 let RenderMethod = "addVecListIndexedOperands"; 364} 365def VecListThreeDWordIndexed : Operand<i32> { 366 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 367 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 368} 369// Register list of three Q registers with half-word lane subscripting. 370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 371 let Name = "VecListThreeQHWordIndexed"; 372 let ParserMethod = "parseVectorList"; 373 let RenderMethod = "addVecListIndexedOperands"; 374} 375def VecListThreeQHWordIndexed : Operand<i32> { 376 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 377 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 378} 379// ...with word lane subscripting. 380def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 381 let Name = "VecListThreeQWordIndexed"; 382 let ParserMethod = "parseVectorList"; 383 let RenderMethod = "addVecListIndexedOperands"; 384} 385def VecListThreeQWordIndexed : Operand<i32> { 386 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 387 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 388} 389 390// Register list of four D registers with byte lane subscripting. 391def VecListFourDByteIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourDByteIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListFourDByteIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400// ...with half-word lane subscripting. 401def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 402 let Name = "VecListFourDHWordIndexed"; 403 let ParserMethod = "parseVectorList"; 404 let RenderMethod = "addVecListIndexedOperands"; 405} 406def VecListFourDHWordIndexed : Operand<i32> { 407 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 408 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 409} 410// ...with word lane subscripting. 411def VecListFourDWordIndexAsmOperand : AsmOperandClass { 412 let Name = "VecListFourDWordIndexed"; 413 let ParserMethod = "parseVectorList"; 414 let RenderMethod = "addVecListIndexedOperands"; 415} 416def VecListFourDWordIndexed : Operand<i32> { 417 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 418 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 419} 420// Register list of four Q registers with half-word lane subscripting. 421def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 422 let Name = "VecListFourQHWordIndexed"; 423 let ParserMethod = "parseVectorList"; 424 let RenderMethod = "addVecListIndexedOperands"; 425} 426def VecListFourQHWordIndexed : Operand<i32> { 427 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 428 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 429} 430// ...with word lane subscripting. 431def VecListFourQWordIndexAsmOperand : AsmOperandClass { 432 let Name = "VecListFourQWordIndexed"; 433 let ParserMethod = "parseVectorList"; 434 let RenderMethod = "addVecListIndexedOperands"; 435} 436def VecListFourQWordIndexed : Operand<i32> { 437 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 438 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 439} 440 441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 442 return cast<LoadSDNode>(N)->getAlignment() >= 8; 443}]>; 444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 445 (store node:$val, node:$ptr), [{ 446 return cast<StoreSDNode>(N)->getAlignment() >= 8; 447}]>; 448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 449 return cast<LoadSDNode>(N)->getAlignment() == 4; 450}]>; 451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 452 (store node:$val, node:$ptr), [{ 453 return cast<StoreSDNode>(N)->getAlignment() == 4; 454}]>; 455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 456 return cast<LoadSDNode>(N)->getAlignment() == 2; 457}]>; 458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 459 (store node:$val, node:$ptr), [{ 460 return cast<StoreSDNode>(N)->getAlignment() == 2; 461}]>; 462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 463 return cast<LoadSDNode>(N)->getAlignment() == 1; 464}]>; 465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 466 (store node:$val, node:$ptr), [{ 467 return cast<StoreSDNode>(N)->getAlignment() == 1; 468}]>; 469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 470 return cast<LoadSDNode>(N)->getAlignment() < 4; 471}]>; 472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 473 (store node:$val, node:$ptr), [{ 474 return cast<StoreSDNode>(N)->getAlignment() < 4; 475}]>; 476 477//===----------------------------------------------------------------------===// 478// NEON-specific DAG Nodes. 479//===----------------------------------------------------------------------===// 480 481def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 482def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 483 484def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 485def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 486def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 487def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 488def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 489def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 490def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 491def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 492def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 493def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 494def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 495 496// Types for vector shift by immediates. The "SHX" version is for long and 497// narrow operations where the source and destination vectors have different 498// types. The "SHINS" version is for shift and insert operations. 499def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 500 SDTCisVT<2, i32>]>; 501def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 502 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 503 504def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; 505 506def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; 507def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>; 508def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>; 509 510def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>; 511def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>; 512def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>; 513def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>; 514def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>; 515def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>; 516 517def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>; 518def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>; 519def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; 520 521def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; 522def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; 523 524def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 525 SDTCisVT<2, i32>]>; 526def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 527def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 528 529def NEONvbsl : SDNode<"ARMISD::VBSL", 530 SDTypeProfile<1, 3, [SDTCisVec<0>, 531 SDTCisSameAs<0, 1>, 532 SDTCisSameAs<0, 2>, 533 SDTCisSameAs<0, 3>]>>; 534 535def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 536 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 537def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 538 539def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 540 SDTCisSameAs<0, 2>, 541 SDTCisSameAs<0, 3>]>; 542def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 543def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 544def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 545 546def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 547 SDTCisSameAs<1, 2>]>; 548def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 549def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 550 551def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 552 SDTCisVT<2, v8i8>]>; 553def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 554 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; 555def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; 556def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; 557 558 559def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{ 560 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 561 unsigned EltBits = 0; 562 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 563 return (EltBits == 32 && EltVal == 0); 564}]>; 565 566def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{ 567 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 568 unsigned EltBits = 0; 569 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 570 return (EltBits == 8 && EltVal == 0xff); 571}]>; 572 573//===----------------------------------------------------------------------===// 574// NEON load / store instructions 575//===----------------------------------------------------------------------===// 576 577// Use VLDM to load a Q register as a D register pair. 578// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 579def VLDMQIA 580 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 581 IIC_fpLoad_m, "", 582 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; 583 584// Use VSTM to store a Q register as a D register pair. 585// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 586def VSTMQIA 587 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 588 IIC_fpStore_m, "", 589 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; 590 591// Classes for VLD* pseudo-instructions with multi-register operands. 592// These are expanded to real instructions after register allocation. 593class VLDQPseudo<InstrItinClass itin> 594 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 595class VLDQWBPseudo<InstrItinClass itin> 596 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 597 (ins addrmode6:$addr, am6offset:$offset), itin, 598 "$addr.addr = $wb">; 599class VLDQWBfixedPseudo<InstrItinClass itin> 600 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 601 (ins addrmode6:$addr), itin, 602 "$addr.addr = $wb">; 603class VLDQWBregisterPseudo<InstrItinClass itin> 604 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 605 (ins addrmode6:$addr, rGPR:$offset), itin, 606 "$addr.addr = $wb">; 607 608class VLDQQPseudo<InstrItinClass itin> 609 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 610class VLDQQWBPseudo<InstrItinClass itin> 611 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 612 (ins addrmode6:$addr, am6offset:$offset), itin, 613 "$addr.addr = $wb">; 614class VLDQQWBfixedPseudo<InstrItinClass itin> 615 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 616 (ins addrmode6:$addr), itin, 617 "$addr.addr = $wb">; 618class VLDQQWBregisterPseudo<InstrItinClass itin> 619 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 620 (ins addrmode6:$addr, rGPR:$offset), itin, 621 "$addr.addr = $wb">; 622 623 624class VLDQQQQPseudo<InstrItinClass itin> 625 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 626 "$src = $dst">; 627class VLDQQQQWBPseudo<InstrItinClass itin> 628 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 629 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 630 "$addr.addr = $wb, $src = $dst">; 631 632let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 633 634// VLD1 : Vector Load (multiple single elements) 635class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 636 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 637 (ins AddrMode:$Rn), IIC_VLD1, 638 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { 639 let Rm = 0b1111; 640 let Inst{4} = Rn{4}; 641 let DecoderMethod = "DecodeVLDST1Instruction"; 642} 643class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 644 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 645 (ins AddrMode:$Rn), IIC_VLD1x2, 646 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { 647 let Rm = 0b1111; 648 let Inst{5-4} = Rn{5-4}; 649 let DecoderMethod = "DecodeVLDST1Instruction"; 650} 651 652def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 653def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 654def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 655def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 656 657def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 658def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 659def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 660def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 661 662// ...with address register writeback: 663multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 664 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 665 (ins AddrMode:$Rn), IIC_VLD1u, 666 "vld1", Dt, "$Vd, $Rn!", 667 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 668 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 669 let Inst{4} = Rn{4}; 670 let DecoderMethod = "DecodeVLDST1Instruction"; 671 } 672 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 673 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 674 "vld1", Dt, "$Vd, $Rn, $Rm", 675 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 676 let Inst{4} = Rn{4}; 677 let DecoderMethod = "DecodeVLDST1Instruction"; 678 } 679} 680multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 681 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 682 (ins AddrMode:$Rn), IIC_VLD1x2u, 683 "vld1", Dt, "$Vd, $Rn!", 684 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 685 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 686 let Inst{5-4} = Rn{5-4}; 687 let DecoderMethod = "DecodeVLDST1Instruction"; 688 } 689 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 690 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 691 "vld1", Dt, "$Vd, $Rn, $Rm", 692 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 693 let Inst{5-4} = Rn{5-4}; 694 let DecoderMethod = "DecodeVLDST1Instruction"; 695 } 696} 697 698defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 699defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 700defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 701defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 702defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 703defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 704defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 705defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 706 707// ...with 3 registers 708class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 709 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 710 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 711 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { 712 let Rm = 0b1111; 713 let Inst{4} = Rn{4}; 714 let DecoderMethod = "DecodeVLDST1Instruction"; 715} 716multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 717 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 718 (ins AddrMode:$Rn), IIC_VLD1x2u, 719 "vld1", Dt, "$Vd, $Rn!", 720 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 721 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 722 let Inst{4} = Rn{4}; 723 let DecoderMethod = "DecodeVLDST1Instruction"; 724 } 725 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 726 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 727 "vld1", Dt, "$Vd, $Rn, $Rm", 728 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 729 let Inst{4} = Rn{4}; 730 let DecoderMethod = "DecodeVLDST1Instruction"; 731 } 732} 733 734def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 735def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 736def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 737def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 738 739defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 740defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 741defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 742defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 743 744def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 745def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 746def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 747def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 748def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 749def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 750 751def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 752def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 753def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 754def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 755def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 756def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 757def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 758def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 759 760// ...with 4 registers 761class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 762 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 763 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 764 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { 765 let Rm = 0b1111; 766 let Inst{5-4} = Rn{5-4}; 767 let DecoderMethod = "DecodeVLDST1Instruction"; 768} 769multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 770 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 771 (ins AddrMode:$Rn), IIC_VLD1x2u, 772 "vld1", Dt, "$Vd, $Rn!", 773 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 774 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 775 let Inst{5-4} = Rn{5-4}; 776 let DecoderMethod = "DecodeVLDST1Instruction"; 777 } 778 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 779 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 780 "vld1", Dt, "$Vd, $Rn, $Rm", 781 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 782 let Inst{5-4} = Rn{5-4}; 783 let DecoderMethod = "DecodeVLDST1Instruction"; 784 } 785} 786 787def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 788def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 789def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 790def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 791 792defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 793defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 794defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 795defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 796 797def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 798def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 799def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 800def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 801def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 802def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 803 804def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 805def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 806def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 807def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 808def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 809def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 810def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 811def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 812 813// VLD2 : Vector Load (multiple 2-element structures) 814class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 815 InstrItinClass itin, Operand AddrMode> 816 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 817 (ins AddrMode:$Rn), itin, 818 "vld2", Dt, "$Vd, $Rn", "", []> { 819 let Rm = 0b1111; 820 let Inst{5-4} = Rn{5-4}; 821 let DecoderMethod = "DecodeVLDST2Instruction"; 822} 823 824def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 825 addrmode6align64or128>, Sched<[WriteVLD2]>; 826def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 827 addrmode6align64or128>, Sched<[WriteVLD2]>; 828def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 829 addrmode6align64or128>, Sched<[WriteVLD2]>; 830 831def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 832 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 833def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 834 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 835def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 836 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 837 838def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 839def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 840def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 841 842// ...with address register writeback: 843multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 844 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 845 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 846 (ins AddrMode:$Rn), itin, 847 "vld2", Dt, "$Vd, $Rn!", 848 "$Rn.addr = $wb", []> { 849 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 850 let Inst{5-4} = Rn{5-4}; 851 let DecoderMethod = "DecodeVLDST2Instruction"; 852 } 853 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 854 (ins AddrMode:$Rn, rGPR:$Rm), itin, 855 "vld2", Dt, "$Vd, $Rn, $Rm", 856 "$Rn.addr = $wb", []> { 857 let Inst{5-4} = Rn{5-4}; 858 let DecoderMethod = "DecodeVLDST2Instruction"; 859 } 860} 861 862defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 863 addrmode6align64or128>, Sched<[WriteVLD2]>; 864defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 865 addrmode6align64or128>, Sched<[WriteVLD2]>; 866defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 867 addrmode6align64or128>, Sched<[WriteVLD2]>; 868 869defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 870 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 871defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 872 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 873defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 874 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 875 876def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 877def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 878def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 879def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 880def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 881def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 882 883// ...with double-spaced registers 884def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 885 addrmode6align64or128>, Sched<[WriteVLD2]>; 886def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 887 addrmode6align64or128>, Sched<[WriteVLD2]>; 888def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 889 addrmode6align64or128>, Sched<[WriteVLD2]>; 890defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 891 addrmode6align64or128>, Sched<[WriteVLD2]>; 892defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 893 addrmode6align64or128>, Sched<[WriteVLD2]>; 894defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 895 addrmode6align64or128>, Sched<[WriteVLD2]>; 896 897// VLD3 : Vector Load (multiple 3-element structures) 898class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 899 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 900 (ins addrmode6:$Rn), IIC_VLD3, 901 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { 902 let Rm = 0b1111; 903 let Inst{4} = Rn{4}; 904 let DecoderMethod = "DecodeVLDST3Instruction"; 905} 906 907def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 908def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 909def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 910 911def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 912def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 913def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 914 915// ...with address register writeback: 916class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 917 : NLdSt<0, 0b10, op11_8, op7_4, 918 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 919 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 920 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 921 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 922 let Inst{4} = Rn{4}; 923 let DecoderMethod = "DecodeVLDST3Instruction"; 924} 925 926def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 927def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 928def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 929 930def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 931def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 932def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 933 934// ...with double-spaced registers: 935def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 936def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 937def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 938def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 939def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 940def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 941 942def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 943def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 944def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 945 946// ...alternate versions to be allocated odd register numbers: 947def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 948def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 949def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 950 951def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 952def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 953def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 954 955// VLD4 : Vector Load (multiple 4-element structures) 956class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 957 : NLdSt<0, 0b10, op11_8, op7_4, 958 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 959 (ins addrmode6:$Rn), IIC_VLD4, 960 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, 961 Sched<[WriteVLD4]> { 962 let Rm = 0b1111; 963 let Inst{5-4} = Rn{5-4}; 964 let DecoderMethod = "DecodeVLDST4Instruction"; 965} 966 967def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 968def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 969def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 970 971def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 972def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 973def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 974 975// ...with address register writeback: 976class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 977 : NLdSt<0, 0b10, op11_8, op7_4, 978 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 979 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 980 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 981 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 982 let Inst{5-4} = Rn{5-4}; 983 let DecoderMethod = "DecodeVLDST4Instruction"; 984} 985 986def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 987def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 988def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 989 990def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 991def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 992def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 993 994// ...with double-spaced registers: 995def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 996def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 997def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 998def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 999def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 1000def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 1001 1002def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1003def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1004def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1005 1006// ...alternate versions to be allocated odd register numbers: 1007def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 1008def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 1009def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 1010 1011def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1012def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1013def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1014 1015} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1016 1017// Classes for VLD*LN pseudo-instructions with multi-register operands. 1018// These are expanded to real instructions after register allocation. 1019class VLDQLNPseudo<InstrItinClass itin> 1020 : PseudoNLdSt<(outs QPR:$dst), 1021 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1022 itin, "$src = $dst">; 1023class VLDQLNWBPseudo<InstrItinClass itin> 1024 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 1025 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1026 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1027class VLDQQLNPseudo<InstrItinClass itin> 1028 : PseudoNLdSt<(outs QQPR:$dst), 1029 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1030 itin, "$src = $dst">; 1031class VLDQQLNWBPseudo<InstrItinClass itin> 1032 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 1033 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1034 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1035class VLDQQQQLNPseudo<InstrItinClass itin> 1036 : PseudoNLdSt<(outs QQQQPR:$dst), 1037 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1038 itin, "$src = $dst">; 1039class VLDQQQQLNWBPseudo<InstrItinClass itin> 1040 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1041 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1042 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1043 1044// VLD1LN : Vector Load (single element to one lane) 1045class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1046 PatFrag LoadOp> 1047 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1048 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1049 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1050 "$src = $Vd", 1051 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1052 (i32 (LoadOp addrmode6:$Rn)), 1053 imm:$lane))]> { 1054 let Rm = 0b1111; 1055 let DecoderMethod = "DecodeVLD1LN"; 1056} 1057class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1058 PatFrag LoadOp> 1059 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1060 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1061 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1062 "$src = $Vd", 1063 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1064 (i32 (LoadOp addrmode6oneL32:$Rn)), 1065 imm:$lane))]>, Sched<[WriteVLD1]> { 1066 let Rm = 0b1111; 1067 let DecoderMethod = "DecodeVLD1LN"; 1068} 1069class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>, 1070 Sched<[WriteVLD1]> { 1071 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1072 (i32 (LoadOp addrmode6:$addr)), 1073 imm:$lane))]; 1074} 1075 1076def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1077 let Inst{7-5} = lane{2-0}; 1078} 1079def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1080 let Inst{7-6} = lane{1-0}; 1081 let Inst{5-4} = Rn{5-4}; 1082} 1083def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1084 let Inst{7} = lane{0}; 1085 let Inst{5-4} = Rn{5-4}; 1086} 1087 1088def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1089def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1090def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1091 1092let Predicates = [HasNEON] in { 1093def : Pat<(vector_insert (v4f16 DPR:$src), 1094 (f16 (load addrmode6:$addr)), imm:$lane), 1095 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1096def : Pat<(vector_insert (v8f16 QPR:$src), 1097 (f16 (load addrmode6:$addr)), imm:$lane), 1098 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1099def : Pat<(vector_insert (v2f32 DPR:$src), 1100 (f32 (load addrmode6:$addr)), imm:$lane), 1101 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1102def : Pat<(vector_insert (v4f32 QPR:$src), 1103 (f32 (load addrmode6:$addr)), imm:$lane), 1104 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1105 1106// A 64-bit subvector insert to the first 128-bit vector position 1107// is a subregister copy that needs no instruction. 1108def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), 1109 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1110def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), 1111 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1112def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), 1113 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1114def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), 1115 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1116def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), 1117 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1118def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), 1119 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1120} 1121 1122 1123let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1124 1125// ...with address register writeback: 1126class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1127 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1128 (ins addrmode6:$Rn, am6offset:$Rm, 1129 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1130 "\\{$Vd[$lane]\\}, $Rn$Rm", 1131 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1132 let DecoderMethod = "DecodeVLD1LN"; 1133} 1134 1135def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1136 let Inst{7-5} = lane{2-0}; 1137} 1138def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1139 let Inst{7-6} = lane{1-0}; 1140 let Inst{4} = Rn{4}; 1141} 1142def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1143 let Inst{7} = lane{0}; 1144 let Inst{5} = Rn{4}; 1145 let Inst{4} = Rn{4}; 1146} 1147 1148def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1149def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1150def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1151 1152// VLD2LN : Vector Load (single 2-element structure to one lane) 1153class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1154 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1155 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1156 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1157 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { 1158 let Rm = 0b1111; 1159 let Inst{4} = Rn{4}; 1160 let DecoderMethod = "DecodeVLD2LN"; 1161} 1162 1163def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1164 let Inst{7-5} = lane{2-0}; 1165} 1166def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1167 let Inst{7-6} = lane{1-0}; 1168} 1169def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1170 let Inst{7} = lane{0}; 1171} 1172 1173def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1174def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1175def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1176 1177// ...with double-spaced registers: 1178def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1179 let Inst{7-6} = lane{1-0}; 1180} 1181def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1182 let Inst{7} = lane{0}; 1183} 1184 1185def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1186def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1187 1188// ...with address register writeback: 1189class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1190 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1191 (ins addrmode6:$Rn, am6offset:$Rm, 1192 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1193 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1194 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1195 let Inst{4} = Rn{4}; 1196 let DecoderMethod = "DecodeVLD2LN"; 1197} 1198 1199def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1200 let Inst{7-5} = lane{2-0}; 1201} 1202def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1203 let Inst{7-6} = lane{1-0}; 1204} 1205def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1206 let Inst{7} = lane{0}; 1207} 1208 1209def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1210def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1211def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1212 1213def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1214 let Inst{7-6} = lane{1-0}; 1215} 1216def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1217 let Inst{7} = lane{0}; 1218} 1219 1220def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1221def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1222 1223// VLD3LN : Vector Load (single 3-element structure to one lane) 1224class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1225 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1226 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1227 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1228 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1229 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { 1230 let Rm = 0b1111; 1231 let DecoderMethod = "DecodeVLD3LN"; 1232} 1233 1234def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1235 let Inst{7-5} = lane{2-0}; 1236} 1237def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1238 let Inst{7-6} = lane{1-0}; 1239} 1240def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1241 let Inst{7} = lane{0}; 1242} 1243 1244def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1245def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1246def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1247 1248// ...with double-spaced registers: 1249def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1250 let Inst{7-6} = lane{1-0}; 1251} 1252def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1253 let Inst{7} = lane{0}; 1254} 1255 1256def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1257def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1258 1259// ...with address register writeback: 1260class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1261 : NLdStLn<1, 0b10, op11_8, op7_4, 1262 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1263 (ins addrmode6:$Rn, am6offset:$Rm, 1264 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1265 IIC_VLD3lnu, "vld3", Dt, 1266 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1267 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1268 []>, Sched<[WriteVLD2]> { 1269 let DecoderMethod = "DecodeVLD3LN"; 1270} 1271 1272def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1273 let Inst{7-5} = lane{2-0}; 1274} 1275def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1276 let Inst{7-6} = lane{1-0}; 1277} 1278def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1279 let Inst{7} = lane{0}; 1280} 1281 1282def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1283def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1284def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1285 1286def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1287 let Inst{7-6} = lane{1-0}; 1288} 1289def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1290 let Inst{7} = lane{0}; 1291} 1292 1293def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1294def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1295 1296// VLD4LN : Vector Load (single 4-element structure to one lane) 1297class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1298 : NLdStLn<1, 0b10, op11_8, op7_4, 1299 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1300 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1301 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1302 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1303 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, 1304 Sched<[WriteVLD2]> { 1305 let Rm = 0b1111; 1306 let Inst{4} = Rn{4}; 1307 let DecoderMethod = "DecodeVLD4LN"; 1308} 1309 1310def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1311 let Inst{7-5} = lane{2-0}; 1312} 1313def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1314 let Inst{7-6} = lane{1-0}; 1315} 1316def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1317 let Inst{7} = lane{0}; 1318 let Inst{5} = Rn{5}; 1319} 1320 1321def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1322def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1323def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1324 1325// ...with double-spaced registers: 1326def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1327 let Inst{7-6} = lane{1-0}; 1328} 1329def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1330 let Inst{7} = lane{0}; 1331 let Inst{5} = Rn{5}; 1332} 1333 1334def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1335def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1336 1337// ...with address register writeback: 1338class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1339 : NLdStLn<1, 0b10, op11_8, op7_4, 1340 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1341 (ins addrmode6:$Rn, am6offset:$Rm, 1342 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1343 IIC_VLD4lnu, "vld4", Dt, 1344"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1345"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1346 []> { 1347 let Inst{4} = Rn{4}; 1348 let DecoderMethod = "DecodeVLD4LN" ; 1349} 1350 1351def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1352 let Inst{7-5} = lane{2-0}; 1353} 1354def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1355 let Inst{7-6} = lane{1-0}; 1356} 1357def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1358 let Inst{7} = lane{0}; 1359 let Inst{5} = Rn{5}; 1360} 1361 1362def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1363def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1364def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1365 1366def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1367 let Inst{7-6} = lane{1-0}; 1368} 1369def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1370 let Inst{7} = lane{0}; 1371 let Inst{5} = Rn{5}; 1372} 1373 1374def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1375def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1376 1377} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1378 1379// VLD1DUP : Vector Load (single element to all lanes) 1380class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1381 Operand AddrMode> 1382 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1383 (ins AddrMode:$Rn), 1384 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1385 [(set VecListOneDAllLanes:$Vd, 1386 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>, 1387 Sched<[WriteVLD2]> { 1388 let Rm = 0b1111; 1389 let Inst{4} = Rn{4}; 1390 let DecoderMethod = "DecodeVLD1DupInstruction"; 1391} 1392def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1393 addrmode6dupalignNone>; 1394def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1395 addrmode6dupalign16>; 1396def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1397 addrmode6dupalign32>; 1398 1399let Predicates = [HasNEON] in { 1400def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1401 (VLD1DUPd32 addrmode6:$addr)>; 1402} 1403 1404class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1405 Operand AddrMode> 1406 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1407 (ins AddrMode:$Rn), IIC_VLD1dup, 1408 "vld1", Dt, "$Vd, $Rn", "", 1409 [(set VecListDPairAllLanes:$Vd, 1410 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1411 let Rm = 0b1111; 1412 let Inst{4} = Rn{4}; 1413 let DecoderMethod = "DecodeVLD1DupInstruction"; 1414} 1415 1416def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1417 addrmode6dupalignNone>; 1418def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1419 addrmode6dupalign16>; 1420def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1421 addrmode6dupalign32>; 1422 1423let Predicates = [HasNEON] in { 1424def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1425 (VLD1DUPq32 addrmode6:$addr)>; 1426} 1427 1428let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1429// ...with address register writeback: 1430multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1431 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1432 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1433 (ins AddrMode:$Rn), IIC_VLD1dupu, 1434 "vld1", Dt, "$Vd, $Rn!", 1435 "$Rn.addr = $wb", []> { 1436 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1437 let Inst{4} = Rn{4}; 1438 let DecoderMethod = "DecodeVLD1DupInstruction"; 1439 } 1440 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1441 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1442 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1443 "vld1", Dt, "$Vd, $Rn, $Rm", 1444 "$Rn.addr = $wb", []> { 1445 let Inst{4} = Rn{4}; 1446 let DecoderMethod = "DecodeVLD1DupInstruction"; 1447 } 1448} 1449multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1450 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1451 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1452 (ins AddrMode:$Rn), IIC_VLD1dupu, 1453 "vld1", Dt, "$Vd, $Rn!", 1454 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1455 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1456 let Inst{4} = Rn{4}; 1457 let DecoderMethod = "DecodeVLD1DupInstruction"; 1458 } 1459 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1460 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1461 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1462 "vld1", Dt, "$Vd, $Rn, $Rm", 1463 "$Rn.addr = $wb", []> { 1464 let Inst{4} = Rn{4}; 1465 let DecoderMethod = "DecodeVLD1DupInstruction"; 1466 } 1467} 1468 1469defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1470defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1471defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1472 1473defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1474defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1475defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1476 1477// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1478class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1479 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1480 (ins AddrMode:$Rn), IIC_VLD2dup, 1481 "vld2", Dt, "$Vd, $Rn", "", []> { 1482 let Rm = 0b1111; 1483 let Inst{4} = Rn{4}; 1484 let DecoderMethod = "DecodeVLD2DupInstruction"; 1485} 1486 1487def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1488 addrmode6dupalign16>; 1489def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1490 addrmode6dupalign32>; 1491def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1492 addrmode6dupalign64>; 1493 1494// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1495// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1496// ...with double-spaced registers 1497def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1498 addrmode6dupalign16>; 1499def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1500 addrmode6dupalign32>; 1501def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1502 addrmode6dupalign64>; 1503 1504def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1505def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1506def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1507def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1508def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1509def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1510 1511// ...with address register writeback: 1512multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1513 Operand AddrMode> { 1514 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1515 (outs VdTy:$Vd, GPR:$wb), 1516 (ins AddrMode:$Rn), IIC_VLD2dupu, 1517 "vld2", Dt, "$Vd, $Rn!", 1518 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1519 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1520 let Inst{4} = Rn{4}; 1521 let DecoderMethod = "DecodeVLD2DupInstruction"; 1522 } 1523 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1524 (outs VdTy:$Vd, GPR:$wb), 1525 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1526 "vld2", Dt, "$Vd, $Rn, $Rm", 1527 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1528 let Inst{4} = Rn{4}; 1529 let DecoderMethod = "DecodeVLD2DupInstruction"; 1530 } 1531} 1532 1533defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1534 addrmode6dupalign16>; 1535defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1536 addrmode6dupalign32>; 1537defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1538 addrmode6dupalign64>; 1539 1540defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1541 addrmode6dupalign16>; 1542defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1543 addrmode6dupalign32>; 1544defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1545 addrmode6dupalign64>; 1546 1547// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1548class VLD3DUP<bits<4> op7_4, string Dt> 1549 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1550 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1551 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, 1552 Sched<[WriteVLD2]> { 1553 let Rm = 0b1111; 1554 let Inst{4} = 0; 1555 let DecoderMethod = "DecodeVLD3DupInstruction"; 1556} 1557 1558def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1559def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1560def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1561 1562def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1563def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1564def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1565 1566// ...with double-spaced registers (not used for codegen): 1567def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1568def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1569def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1570 1571def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1572def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1573def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1574def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1575def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1576def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1577 1578// ...with address register writeback: 1579class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1580 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1581 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1582 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1583 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1584 let Inst{4} = 0; 1585 let DecoderMethod = "DecodeVLD3DupInstruction"; 1586} 1587 1588def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1589def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1590def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1591 1592def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1593def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1594def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1595 1596def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1597def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1598def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1599 1600// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1601class VLD4DUP<bits<4> op7_4, string Dt> 1602 : NLdSt<1, 0b10, 0b1111, op7_4, 1603 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1604 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1605 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1606 let Rm = 0b1111; 1607 let Inst{4} = Rn{4}; 1608 let DecoderMethod = "DecodeVLD4DupInstruction"; 1609} 1610 1611def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1612def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1613def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1614 1615def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1616def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1617def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1618 1619// ...with double-spaced registers (not used for codegen): 1620def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1621def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1622def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1623 1624def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1625def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1626def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1627def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1628def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1629def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1630 1631// ...with address register writeback: 1632class VLD4DUPWB<bits<4> op7_4, string Dt> 1633 : NLdSt<1, 0b10, 0b1111, op7_4, 1634 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1635 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1636 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1637 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1638 let Inst{4} = Rn{4}; 1639 let DecoderMethod = "DecodeVLD4DupInstruction"; 1640} 1641 1642def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1643def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1644def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1645 1646def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1647def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1648def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1649 1650def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1651def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1652def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1653 1654} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1655 1656let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1657 1658// Classes for VST* pseudo-instructions with multi-register operands. 1659// These are expanded to real instructions after register allocation. 1660class VSTQPseudo<InstrItinClass itin> 1661 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1662class VSTQWBPseudo<InstrItinClass itin> 1663 : PseudoNLdSt<(outs GPR:$wb), 1664 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1665 "$addr.addr = $wb">; 1666class VSTQWBfixedPseudo<InstrItinClass itin> 1667 : PseudoNLdSt<(outs GPR:$wb), 1668 (ins addrmode6:$addr, QPR:$src), itin, 1669 "$addr.addr = $wb">; 1670class VSTQWBregisterPseudo<InstrItinClass itin> 1671 : PseudoNLdSt<(outs GPR:$wb), 1672 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1673 "$addr.addr = $wb">; 1674class VSTQQPseudo<InstrItinClass itin> 1675 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1676class VSTQQWBPseudo<InstrItinClass itin> 1677 : PseudoNLdSt<(outs GPR:$wb), 1678 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1679 "$addr.addr = $wb">; 1680class VSTQQWBfixedPseudo<InstrItinClass itin> 1681 : PseudoNLdSt<(outs GPR:$wb), 1682 (ins addrmode6:$addr, QQPR:$src), itin, 1683 "$addr.addr = $wb">; 1684class VSTQQWBregisterPseudo<InstrItinClass itin> 1685 : PseudoNLdSt<(outs GPR:$wb), 1686 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1687 "$addr.addr = $wb">; 1688 1689class VSTQQQQPseudo<InstrItinClass itin> 1690 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1691class VSTQQQQWBPseudo<InstrItinClass itin> 1692 : PseudoNLdSt<(outs GPR:$wb), 1693 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1694 "$addr.addr = $wb">; 1695 1696// VST1 : Vector Store (multiple single elements) 1697class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1698 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1699 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { 1700 let Rm = 0b1111; 1701 let Inst{4} = Rn{4}; 1702 let DecoderMethod = "DecodeVLDST1Instruction"; 1703} 1704class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1705 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1706 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { 1707 let Rm = 0b1111; 1708 let Inst{5-4} = Rn{5-4}; 1709 let DecoderMethod = "DecodeVLDST1Instruction"; 1710} 1711 1712def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1713def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1714def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1715def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1716 1717def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1718def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1719def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1720def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1721 1722// ...with address register writeback: 1723multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1724 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1725 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1726 "vst1", Dt, "$Vd, $Rn!", 1727 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1728 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1729 let Inst{4} = Rn{4}; 1730 let DecoderMethod = "DecodeVLDST1Instruction"; 1731 } 1732 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1733 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1734 IIC_VLD1u, 1735 "vst1", Dt, "$Vd, $Rn, $Rm", 1736 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1737 let Inst{4} = Rn{4}; 1738 let DecoderMethod = "DecodeVLDST1Instruction"; 1739 } 1740} 1741multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1742 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1743 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1744 "vst1", Dt, "$Vd, $Rn!", 1745 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1746 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1747 let Inst{5-4} = Rn{5-4}; 1748 let DecoderMethod = "DecodeVLDST1Instruction"; 1749 } 1750 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1751 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1752 IIC_VLD1x2u, 1753 "vst1", Dt, "$Vd, $Rn, $Rm", 1754 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1755 let Inst{5-4} = Rn{5-4}; 1756 let DecoderMethod = "DecodeVLDST1Instruction"; 1757 } 1758} 1759 1760defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1761defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1762defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1763defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1764 1765defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1766defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1767defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1768defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1769 1770// ...with 3 registers 1771class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1772 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1773 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1774 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { 1775 let Rm = 0b1111; 1776 let Inst{4} = Rn{4}; 1777 let DecoderMethod = "DecodeVLDST1Instruction"; 1778} 1779multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1780 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1781 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1782 "vst1", Dt, "$Vd, $Rn!", 1783 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1784 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1785 let Inst{5-4} = Rn{5-4}; 1786 let DecoderMethod = "DecodeVLDST1Instruction"; 1787 } 1788 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1789 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1790 IIC_VLD1x3u, 1791 "vst1", Dt, "$Vd, $Rn, $Rm", 1792 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1793 let Inst{5-4} = Rn{5-4}; 1794 let DecoderMethod = "DecodeVLDST1Instruction"; 1795 } 1796} 1797 1798def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1799def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1800def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1801def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1802 1803defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1804defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1805defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1806defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1807 1808def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1809def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1810def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1811def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1812def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1813def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1814 1815def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1816def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1817def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1818def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1819def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1820def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1821def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1822def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1823 1824// ...with 4 registers 1825class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1826 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1827 (ins AddrMode:$Rn, VecListFourD:$Vd), 1828 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1829 []>, Sched<[WriteVST4]> { 1830 let Rm = 0b1111; 1831 let Inst{5-4} = Rn{5-4}; 1832 let DecoderMethod = "DecodeVLDST1Instruction"; 1833} 1834multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1835 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1836 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1837 "vst1", Dt, "$Vd, $Rn!", 1838 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1839 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1840 let Inst{5-4} = Rn{5-4}; 1841 let DecoderMethod = "DecodeVLDST1Instruction"; 1842 } 1843 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1844 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1845 IIC_VLD1x4u, 1846 "vst1", Dt, "$Vd, $Rn, $Rm", 1847 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1848 let Inst{5-4} = Rn{5-4}; 1849 let DecoderMethod = "DecodeVLDST1Instruction"; 1850 } 1851} 1852 1853def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1854def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1855def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1856def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1857 1858defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1859defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1860defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1861defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1862 1863def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1864def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1865def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1866def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1867def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1868def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1869 1870def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1871def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1872def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1873def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1874def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1875def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1876def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1877def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1878 1879// VST2 : Vector Store (multiple 2-element structures) 1880class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1881 InstrItinClass itin, Operand AddrMode> 1882 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1883 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1884 let Rm = 0b1111; 1885 let Inst{5-4} = Rn{5-4}; 1886 let DecoderMethod = "DecodeVLDST2Instruction"; 1887} 1888 1889def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1890 addrmode6align64or128>, Sched<[WriteVST2]>; 1891def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1892 addrmode6align64or128>, Sched<[WriteVST2]>; 1893def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1894 addrmode6align64or128>, Sched<[WriteVST2]>; 1895 1896def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1897 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1898def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1899 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1900def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1901 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1902 1903def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1904def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1905def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1906 1907// ...with address register writeback: 1908multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1909 RegisterOperand VdTy, Operand AddrMode> { 1910 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1911 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1912 "vst2", Dt, "$Vd, $Rn!", 1913 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1914 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1915 let Inst{5-4} = Rn{5-4}; 1916 let DecoderMethod = "DecodeVLDST2Instruction"; 1917 } 1918 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1919 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1920 "vst2", Dt, "$Vd, $Rn, $Rm", 1921 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1922 let Inst{5-4} = Rn{5-4}; 1923 let DecoderMethod = "DecodeVLDST2Instruction"; 1924 } 1925} 1926multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1927 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1928 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1929 "vst2", Dt, "$Vd, $Rn!", 1930 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1931 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1932 let Inst{5-4} = Rn{5-4}; 1933 let DecoderMethod = "DecodeVLDST2Instruction"; 1934 } 1935 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1936 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1937 IIC_VLD1u, 1938 "vst2", Dt, "$Vd, $Rn, $Rm", 1939 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1940 let Inst{5-4} = Rn{5-4}; 1941 let DecoderMethod = "DecodeVLDST2Instruction"; 1942 } 1943} 1944 1945defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1946 addrmode6align64or128>; 1947defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1948 addrmode6align64or128>; 1949defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1950 addrmode6align64or128>; 1951 1952defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1953defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1954defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1955 1956def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1957def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1958def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1959def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1960def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1961def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1962 1963// ...with double-spaced registers 1964def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1965 addrmode6align64or128>; 1966def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1967 addrmode6align64or128>; 1968def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1969 addrmode6align64or128>; 1970defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 1971 addrmode6align64or128>; 1972defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 1973 addrmode6align64or128>; 1974defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 1975 addrmode6align64or128>; 1976 1977// VST3 : Vector Store (multiple 3-element structures) 1978class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1979 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1980 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1981 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { 1982 let Rm = 0b1111; 1983 let Inst{4} = Rn{4}; 1984 let DecoderMethod = "DecodeVLDST3Instruction"; 1985} 1986 1987def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1988def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1989def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1990 1991def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1992def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1993def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1994 1995// ...with address register writeback: 1996class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1997 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1998 (ins addrmode6:$Rn, am6offset:$Rm, 1999 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 2000 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 2001 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 2002 let Inst{4} = Rn{4}; 2003 let DecoderMethod = "DecodeVLDST3Instruction"; 2004} 2005 2006def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 2007def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 2008def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 2009 2010def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2011def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2012def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2013 2014// ...with double-spaced registers: 2015def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 2016def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 2017def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 2018def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 2019def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 2020def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 2021 2022def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2023def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2024def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2025 2026// ...alternate versions to be allocated odd register numbers: 2027def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2028def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2029def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2030 2031def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2032def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2033def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2034 2035// VST4 : Vector Store (multiple 4-element structures) 2036class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 2037 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2038 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 2039 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 2040 "", []>, Sched<[WriteVST4]> { 2041 let Rm = 0b1111; 2042 let Inst{5-4} = Rn{5-4}; 2043 let DecoderMethod = "DecodeVLDST4Instruction"; 2044} 2045 2046def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 2047def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 2048def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 2049 2050def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2051def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2052def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2053 2054// ...with address register writeback: 2055class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2056 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2057 (ins addrmode6:$Rn, am6offset:$Rm, 2058 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 2059 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 2060 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 2061 let Inst{5-4} = Rn{5-4}; 2062 let DecoderMethod = "DecodeVLDST4Instruction"; 2063} 2064 2065def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 2066def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 2067def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 2068 2069def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2070def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2071def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2072 2073// ...with double-spaced registers: 2074def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2075def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2076def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2077def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2078def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2079def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2080 2081def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2082def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2083def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2084 2085// ...alternate versions to be allocated odd register numbers: 2086def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2087def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2088def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2089 2090def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2091def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2092def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2093 2094} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2095 2096// Classes for VST*LN pseudo-instructions with multi-register operands. 2097// These are expanded to real instructions after register allocation. 2098class VSTQLNPseudo<InstrItinClass itin> 2099 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2100 itin, "">; 2101class VSTQLNWBPseudo<InstrItinClass itin> 2102 : PseudoNLdSt<(outs GPR:$wb), 2103 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2104 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2105class VSTQQLNPseudo<InstrItinClass itin> 2106 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2107 itin, "">; 2108class VSTQQLNWBPseudo<InstrItinClass itin> 2109 : PseudoNLdSt<(outs GPR:$wb), 2110 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2111 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2112class VSTQQQQLNPseudo<InstrItinClass itin> 2113 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2114 itin, "">; 2115class VSTQQQQLNWBPseudo<InstrItinClass itin> 2116 : PseudoNLdSt<(outs GPR:$wb), 2117 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2118 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2119 2120// VST1LN : Vector Store (single element from one lane) 2121class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2122 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2123 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2124 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2125 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2126 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, 2127 Sched<[WriteVST1]> { 2128 let Rm = 0b1111; 2129 let DecoderMethod = "DecodeVST1LN"; 2130} 2131class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2132 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> { 2133 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2134 addrmode6:$addr)]; 2135} 2136 2137def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2138 ARMvgetlaneu, addrmode6> { 2139 let Inst{7-5} = lane{2-0}; 2140} 2141def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2142 ARMvgetlaneu, addrmode6> { 2143 let Inst{7-6} = lane{1-0}; 2144 let Inst{4} = Rn{4}; 2145} 2146 2147def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2148 addrmode6oneL32> { 2149 let Inst{7} = lane{0}; 2150 let Inst{5-4} = Rn{5-4}; 2151} 2152 2153def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>; 2154def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>; 2155def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2156 2157let Predicates = [HasNEON] in { 2158def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2159 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2160def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2161 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2162 2163def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr), 2164 (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 2165def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr), 2166 (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2167} 2168 2169// ...with address register writeback: 2170class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2171 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2172 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2173 (ins AdrMode:$Rn, am6offset:$Rm, 2174 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2175 "\\{$Vd[$lane]\\}, $Rn$Rm", 2176 "$Rn.addr = $wb", 2177 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2178 AdrMode:$Rn, am6offset:$Rm))]>, 2179 Sched<[WriteVST1]> { 2180 let DecoderMethod = "DecodeVST1LN"; 2181} 2182class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2183 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> { 2184 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2185 addrmode6:$addr, am6offset:$offset))]; 2186} 2187 2188def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2189 ARMvgetlaneu, addrmode6> { 2190 let Inst{7-5} = lane{2-0}; 2191} 2192def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2193 ARMvgetlaneu, addrmode6> { 2194 let Inst{7-6} = lane{1-0}; 2195 let Inst{4} = Rn{4}; 2196} 2197def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2198 extractelt, addrmode6oneL32> { 2199 let Inst{7} = lane{0}; 2200 let Inst{5-4} = Rn{5-4}; 2201} 2202 2203def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>; 2204def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>; 2205def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2206 2207let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2208 2209// VST2LN : Vector Store (single 2-element structure from one lane) 2210class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2211 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2212 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2213 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2214 "", []>, Sched<[WriteVST1]> { 2215 let Rm = 0b1111; 2216 let Inst{4} = Rn{4}; 2217 let DecoderMethod = "DecodeVST2LN"; 2218} 2219 2220def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2221 let Inst{7-5} = lane{2-0}; 2222} 2223def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2224 let Inst{7-6} = lane{1-0}; 2225} 2226def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2227 let Inst{7} = lane{0}; 2228} 2229 2230def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2231def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2232def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2233 2234// ...with double-spaced registers: 2235def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2236 let Inst{7-6} = lane{1-0}; 2237 let Inst{4} = Rn{4}; 2238} 2239def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2240 let Inst{7} = lane{0}; 2241 let Inst{4} = Rn{4}; 2242} 2243 2244def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2245def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2246 2247// ...with address register writeback: 2248class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2249 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2250 (ins addrmode6:$Rn, am6offset:$Rm, 2251 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2252 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2253 "$Rn.addr = $wb", []> { 2254 let Inst{4} = Rn{4}; 2255 let DecoderMethod = "DecodeVST2LN"; 2256} 2257 2258def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2259 let Inst{7-5} = lane{2-0}; 2260} 2261def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2262 let Inst{7-6} = lane{1-0}; 2263} 2264def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2265 let Inst{7} = lane{0}; 2266} 2267 2268def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2269def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2270def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2271 2272def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2273 let Inst{7-6} = lane{1-0}; 2274} 2275def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2276 let Inst{7} = lane{0}; 2277} 2278 2279def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2280def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2281 2282// VST3LN : Vector Store (single 3-element structure from one lane) 2283class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2284 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2285 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2286 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2287 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, 2288 Sched<[WriteVST2]> { 2289 let Rm = 0b1111; 2290 let DecoderMethod = "DecodeVST3LN"; 2291} 2292 2293def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2294 let Inst{7-5} = lane{2-0}; 2295} 2296def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2297 let Inst{7-6} = lane{1-0}; 2298} 2299def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2300 let Inst{7} = lane{0}; 2301} 2302 2303def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2304def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2305def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2306 2307// ...with double-spaced registers: 2308def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2309 let Inst{7-6} = lane{1-0}; 2310} 2311def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2312 let Inst{7} = lane{0}; 2313} 2314 2315def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2316def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2317 2318// ...with address register writeback: 2319class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2320 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2321 (ins addrmode6:$Rn, am6offset:$Rm, 2322 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2323 IIC_VST3lnu, "vst3", Dt, 2324 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2325 "$Rn.addr = $wb", []> { 2326 let DecoderMethod = "DecodeVST3LN"; 2327} 2328 2329def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2330 let Inst{7-5} = lane{2-0}; 2331} 2332def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2333 let Inst{7-6} = lane{1-0}; 2334} 2335def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2336 let Inst{7} = lane{0}; 2337} 2338 2339def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2340def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2341def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2342 2343def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2344 let Inst{7-6} = lane{1-0}; 2345} 2346def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2347 let Inst{7} = lane{0}; 2348} 2349 2350def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2351def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2352 2353// VST4LN : Vector Store (single 4-element structure from one lane) 2354class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2355 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2356 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2357 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2358 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2359 "", []>, Sched<[WriteVST2]> { 2360 let Rm = 0b1111; 2361 let Inst{4} = Rn{4}; 2362 let DecoderMethod = "DecodeVST4LN"; 2363} 2364 2365def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2366 let Inst{7-5} = lane{2-0}; 2367} 2368def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2369 let Inst{7-6} = lane{1-0}; 2370} 2371def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2372 let Inst{7} = lane{0}; 2373 let Inst{5} = Rn{5}; 2374} 2375 2376def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2377def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2378def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2379 2380// ...with double-spaced registers: 2381def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2382 let Inst{7-6} = lane{1-0}; 2383} 2384def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2385 let Inst{7} = lane{0}; 2386 let Inst{5} = Rn{5}; 2387} 2388 2389def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2390def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2391 2392// ...with address register writeback: 2393class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2394 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2395 (ins addrmode6:$Rn, am6offset:$Rm, 2396 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2397 IIC_VST4lnu, "vst4", Dt, 2398 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2399 "$Rn.addr = $wb", []> { 2400 let Inst{4} = Rn{4}; 2401 let DecoderMethod = "DecodeVST4LN"; 2402} 2403 2404def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2405 let Inst{7-5} = lane{2-0}; 2406} 2407def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2408 let Inst{7-6} = lane{1-0}; 2409} 2410def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2411 let Inst{7} = lane{0}; 2412 let Inst{5} = Rn{5}; 2413} 2414 2415def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2416def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2417def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2418 2419def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2420 let Inst{7-6} = lane{1-0}; 2421} 2422def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2423 let Inst{7} = lane{0}; 2424 let Inst{5} = Rn{5}; 2425} 2426 2427def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2428def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2429 2430} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2431 2432// Use vld1/vst1 for unaligned f64 load / store 2433let Predicates = [IsLE,HasNEON] in { 2434def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2435 (VLD1d16 addrmode6:$addr)>; 2436def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2437 (VST1d16 addrmode6:$addr, DPR:$value)>; 2438def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2439 (VLD1d8 addrmode6:$addr)>; 2440def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2441 (VST1d8 addrmode6:$addr, DPR:$value)>; 2442} 2443let Predicates = [IsBE,HasNEON] in { 2444def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2445 (VLD1d64 addrmode6:$addr)>; 2446def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2447 (VST1d64 addrmode6:$addr, DPR:$value)>; 2448} 2449 2450// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2451// load / store if it's legal. 2452let Predicates = [HasNEON] in { 2453def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2454 (VLD1q64 addrmode6:$addr)>; 2455def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2456 (VST1q64 addrmode6:$addr, QPR:$value)>; 2457} 2458let Predicates = [IsLE,HasNEON] in { 2459def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2460 (VLD1q32 addrmode6:$addr)>; 2461def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2462 (VST1q32 addrmode6:$addr, QPR:$value)>; 2463def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2464 (VLD1q16 addrmode6:$addr)>; 2465def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2466 (VST1q16 addrmode6:$addr, QPR:$value)>; 2467def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2468 (VLD1q8 addrmode6:$addr)>; 2469def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2470 (VST1q8 addrmode6:$addr, QPR:$value)>; 2471} 2472 2473//===----------------------------------------------------------------------===// 2474// NEON pattern fragments 2475//===----------------------------------------------------------------------===// 2476 2477// Extract D sub-registers of Q registers. 2478def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2479 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2480 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N), 2481 MVT::i32); 2482}]>; 2483def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2484 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2485 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N), 2486 MVT::i32); 2487}]>; 2488def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2489 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2490 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N), 2491 MVT::i32); 2492}]>; 2493def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2494 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2495 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N), 2496 MVT::i32); 2497}]>; 2498 2499// Extract S sub-registers of Q/D registers. 2500def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2501 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2502 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N), 2503 MVT::i32); 2504}]>; 2505 2506// Extract S sub-registers of Q/D registers containing a given f16 lane. 2507def SSubReg_f16_reg : SDNodeXForm<imm, [{ 2508 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2509 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue()/2, SDLoc(N), 2510 MVT::i32); 2511}]>; 2512 2513// Translate lane numbers from Q registers to D subregs. 2514def SubReg_i8_lane : SDNodeXForm<imm, [{ 2515 return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32); 2516}]>; 2517def SubReg_i16_lane : SDNodeXForm<imm, [{ 2518 return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32); 2519}]>; 2520def SubReg_i32_lane : SDNodeXForm<imm, [{ 2521 return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32); 2522}]>; 2523 2524//===----------------------------------------------------------------------===// 2525// Instruction Classes 2526//===----------------------------------------------------------------------===// 2527 2528// Basic 2-register operations: double- and quad-register. 2529class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2530 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2531 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2532 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2533 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2534 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2535class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2536 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2537 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2538 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2539 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2540 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2541 2542// Basic 2-register intrinsics, both double- and quad-register. 2543class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2544 bits<2> op17_16, bits<5> op11_7, bit op4, 2545 InstrItinClass itin, string OpcodeStr, string Dt, 2546 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2547 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2548 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2549 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2550class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2551 bits<2> op17_16, bits<5> op11_7, bit op4, 2552 InstrItinClass itin, string OpcodeStr, string Dt, 2553 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2554 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2555 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2556 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2557 2558// Same as above, but not predicated. 2559class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2560 InstrItinClass itin, string OpcodeStr, string Dt, 2561 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2562 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2563 itin, OpcodeStr, Dt, 2564 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2565 2566class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2567 InstrItinClass itin, string OpcodeStr, string Dt, 2568 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2569 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2570 itin, OpcodeStr, Dt, 2571 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2572 2573// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2574class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2575 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2576 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2577 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2578 itin, OpcodeStr, Dt, 2579 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2580 2581// Same as N2VQIntXnp but with Vd as a src register. 2582class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2583 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2584 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2585 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2586 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2587 itin, OpcodeStr, Dt, 2588 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2589 let Constraints = "$src = $Vd"; 2590} 2591 2592// Narrow 2-register operations. 2593class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2594 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2595 InstrItinClass itin, string OpcodeStr, string Dt, 2596 ValueType TyD, ValueType TyQ, SDNode OpNode> 2597 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2598 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2599 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2600 2601// Narrow 2-register intrinsics. 2602class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2603 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2604 InstrItinClass itin, string OpcodeStr, string Dt, 2605 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2606 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2607 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2608 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2609 2610// Long 2-register operations (currently only used for VMOVL). 2611class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2612 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2613 InstrItinClass itin, string OpcodeStr, string Dt, 2614 ValueType TyQ, ValueType TyD, SDNode OpNode> 2615 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2616 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2617 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2618 2619// Long 2-register intrinsics. 2620class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2621 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2622 InstrItinClass itin, string OpcodeStr, string Dt, 2623 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2624 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2625 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2626 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2627 2628// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2629class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2630 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2631 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2632 OpcodeStr, Dt, "$Vd, $Vm", 2633 "$src1 = $Vd, $src2 = $Vm", []>; 2634class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2635 InstrItinClass itin, string OpcodeStr, string Dt> 2636 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2637 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2638 "$src1 = $Vd, $src2 = $Vm", []>; 2639 2640// Basic 3-register operations: double- and quad-register. 2641class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2642 InstrItinClass itin, string OpcodeStr, string Dt, 2643 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2644 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2645 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2646 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2647 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2648 // All of these have a two-operand InstAlias. 2649 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2650 let isCommutable = Commutable; 2651} 2652// Same as N3VD but no data type. 2653class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2654 InstrItinClass itin, string OpcodeStr, 2655 ValueType ResTy, ValueType OpTy, 2656 SDNode OpNode, bit Commutable> 2657 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2658 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2659 OpcodeStr, "$Vd, $Vn, $Vm", "", 2660 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2661 // All of these have a two-operand InstAlias. 2662 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2663 let isCommutable = Commutable; 2664} 2665 2666class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2667 InstrItinClass itin, string OpcodeStr, string Dt, 2668 ValueType Ty, SDNode ShOp> 2669 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2670 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2671 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2672 [(set (Ty DPR:$Vd), 2673 (Ty (ShOp (Ty DPR:$Vn), 2674 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2675 // All of these have a two-operand InstAlias. 2676 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2677 let isCommutable = 0; 2678} 2679class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2680 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2681 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2682 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2683 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2684 [(set (Ty DPR:$Vd), 2685 (Ty (ShOp (Ty DPR:$Vn), 2686 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2687 // All of these have a two-operand InstAlias. 2688 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2689 let isCommutable = 0; 2690} 2691 2692class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2693 InstrItinClass itin, string OpcodeStr, string Dt, 2694 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2695 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2696 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2697 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2698 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2699 // All of these have a two-operand InstAlias. 2700 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2701 let isCommutable = Commutable; 2702} 2703class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2704 InstrItinClass itin, string OpcodeStr, 2705 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2706 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2707 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2708 OpcodeStr, "$Vd, $Vn, $Vm", "", 2709 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2710 // All of these have a two-operand InstAlias. 2711 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2712 let isCommutable = Commutable; 2713} 2714class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2715 InstrItinClass itin, string OpcodeStr, string Dt, 2716 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2717 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2718 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2719 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2720 [(set (ResTy QPR:$Vd), 2721 (ResTy (ShOp (ResTy QPR:$Vn), 2722 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2723 imm:$lane)))))]> { 2724 // All of these have a two-operand InstAlias. 2725 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2726 let isCommutable = 0; 2727} 2728class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2729 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2730 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2731 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2732 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2733 [(set (ResTy QPR:$Vd), 2734 (ResTy (ShOp (ResTy QPR:$Vn), 2735 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2736 imm:$lane)))))]> { 2737 // All of these have a two-operand InstAlias. 2738 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2739 let isCommutable = 0; 2740} 2741 2742// Basic 3-register intrinsics, both double- and quad-register. 2743class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2744 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2745 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2746 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2747 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2748 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2749 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2750 // All of these have a two-operand InstAlias. 2751 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2752 let isCommutable = Commutable; 2753} 2754 2755class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2756 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2757 string Dt, ValueType ResTy, ValueType OpTy, 2758 SDPatternOperator IntOp, bit Commutable> 2759 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2760 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2761 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2762 2763class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2764 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2765 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2766 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2767 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2768 [(set (Ty DPR:$Vd), 2769 (Ty (IntOp (Ty DPR:$Vn), 2770 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2771 imm:$lane)))))]> { 2772 let isCommutable = 0; 2773} 2774 2775class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2776 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2777 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2778 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2779 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2780 [(set (Ty DPR:$Vd), 2781 (Ty (IntOp (Ty DPR:$Vn), 2782 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2783 let isCommutable = 0; 2784} 2785class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2786 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2787 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2788 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2789 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2790 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2791 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2792 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2793 let isCommutable = 0; 2794} 2795 2796class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2797 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2798 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2799 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2800 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2801 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2802 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2803 // All of these have a two-operand InstAlias. 2804 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2805 let isCommutable = Commutable; 2806} 2807 2808class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2809 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2810 string Dt, ValueType ResTy, ValueType OpTy, 2811 SDPatternOperator IntOp, bit Commutable> 2812 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2813 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2814 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2815 2816// Same as N3VQIntnp but with Vd as a src register. 2817class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2818 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2819 string Dt, ValueType ResTy, ValueType OpTy, 2820 SDPatternOperator IntOp, bit Commutable> 2821 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2822 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2823 f, itin, OpcodeStr, Dt, 2824 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2825 (OpTy QPR:$Vm))))]> { 2826 let Constraints = "$src = $Vd"; 2827} 2828 2829class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2830 string OpcodeStr, string Dt, 2831 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2832 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2833 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2834 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2835 [(set (ResTy QPR:$Vd), 2836 (ResTy (IntOp (ResTy QPR:$Vn), 2837 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2838 imm:$lane)))))]> { 2839 let isCommutable = 0; 2840} 2841class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2842 string OpcodeStr, string Dt, 2843 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2844 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2845 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2846 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2847 [(set (ResTy QPR:$Vd), 2848 (ResTy (IntOp (ResTy QPR:$Vn), 2849 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2850 imm:$lane)))))]> { 2851 let isCommutable = 0; 2852} 2853class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2854 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2855 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2856 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2857 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2858 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2859 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2860 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2861 let isCommutable = 0; 2862} 2863 2864// Multiply-Add/Sub operations: double- and quad-register. 2865class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2866 InstrItinClass itin, string OpcodeStr, string Dt, 2867 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2868 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2869 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2870 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2871 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2872 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2873 2874class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2875 string OpcodeStr, string Dt, 2876 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2877 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2878 (outs DPR:$Vd), 2879 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2880 NVMulSLFrm, itin, 2881 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2882 [(set (Ty DPR:$Vd), 2883 (Ty (ShOp (Ty DPR:$src1), 2884 (Ty (MulOp DPR:$Vn, 2885 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2886 imm:$lane)))))))]>; 2887class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2888 string OpcodeStr, string Dt, 2889 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2890 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2891 (outs DPR:$Vd), 2892 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2893 NVMulSLFrm, itin, 2894 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2895 [(set (Ty DPR:$Vd), 2896 (Ty (ShOp (Ty DPR:$src1), 2897 (Ty (MulOp DPR:$Vn, 2898 (Ty (ARMvduplane (Ty DPR_8:$Vm), 2899 imm:$lane)))))))]>; 2900 2901class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2902 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2903 SDPatternOperator MulOp, SDPatternOperator OpNode> 2904 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2905 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2906 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2907 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2908 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2909class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2910 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2911 SDPatternOperator MulOp, SDPatternOperator ShOp> 2912 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2913 (outs QPR:$Vd), 2914 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2915 NVMulSLFrm, itin, 2916 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2917 [(set (ResTy QPR:$Vd), 2918 (ResTy (ShOp (ResTy QPR:$src1), 2919 (ResTy (MulOp QPR:$Vn, 2920 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2921 imm:$lane)))))))]>; 2922class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2923 string OpcodeStr, string Dt, 2924 ValueType ResTy, ValueType OpTy, 2925 SDPatternOperator MulOp, SDPatternOperator ShOp> 2926 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2927 (outs QPR:$Vd), 2928 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2929 NVMulSLFrm, itin, 2930 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2931 [(set (ResTy QPR:$Vd), 2932 (ResTy (ShOp (ResTy QPR:$src1), 2933 (ResTy (MulOp QPR:$Vn, 2934 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2935 imm:$lane)))))))]>; 2936 2937// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2938class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2939 InstrItinClass itin, string OpcodeStr, string Dt, 2940 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2941 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2942 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2943 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2944 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2945 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2946class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2947 InstrItinClass itin, string OpcodeStr, string Dt, 2948 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2949 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2950 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2951 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2952 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2953 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2954 2955// Neon 3-argument intrinsics, both double- and quad-register. 2956// The destination register is also used as the first source operand register. 2957class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2958 InstrItinClass itin, string OpcodeStr, string Dt, 2959 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2960 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2961 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2962 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2963 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2964 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2965class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2966 InstrItinClass itin, string OpcodeStr, string Dt, 2967 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2968 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2969 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2970 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2971 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2972 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2973 2974// Long Multiply-Add/Sub operations. 2975class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2976 InstrItinClass itin, string OpcodeStr, string Dt, 2977 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2978 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2979 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2980 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2981 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2982 (TyQ (MulOp (TyD DPR:$Vn), 2983 (TyD DPR:$Vm)))))]>; 2984class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2985 InstrItinClass itin, string OpcodeStr, string Dt, 2986 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2987 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2988 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2989 NVMulSLFrm, itin, 2990 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2991 [(set QPR:$Vd, 2992 (OpNode (TyQ QPR:$src1), 2993 (TyQ (MulOp (TyD DPR:$Vn), 2994 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm), 2995 imm:$lane))))))]>; 2996class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2997 InstrItinClass itin, string OpcodeStr, string Dt, 2998 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2999 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 3000 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3001 NVMulSLFrm, itin, 3002 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3003 [(set QPR:$Vd, 3004 (OpNode (TyQ QPR:$src1), 3005 (TyQ (MulOp (TyD DPR:$Vn), 3006 (TyD (ARMvduplane (TyD DPR_8:$Vm), 3007 imm:$lane))))))]>; 3008 3009// Long Intrinsic-Op vector operations with explicit extend (VABAL). 3010class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3011 InstrItinClass itin, string OpcodeStr, string Dt, 3012 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3013 SDNode OpNode> 3014 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3015 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3016 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 3017 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 3018 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3019 (TyD DPR:$Vm)))))))]>; 3020 3021// Neon Long 3-argument intrinsic. The destination register is 3022// a quad-register and is also used as the first source operand register. 3023class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3024 InstrItinClass itin, string OpcodeStr, string Dt, 3025 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 3026 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3027 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3028 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 3029 [(set QPR:$Vd, 3030 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 3031class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3032 string OpcodeStr, string Dt, 3033 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3034 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3035 (outs QPR:$Vd), 3036 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3037 NVMulSLFrm, itin, 3038 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3039 [(set (ResTy QPR:$Vd), 3040 (ResTy (IntOp (ResTy QPR:$src1), 3041 (OpTy DPR:$Vn), 3042 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3043 imm:$lane)))))]>; 3044class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3045 InstrItinClass itin, string OpcodeStr, string Dt, 3046 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3047 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3048 (outs QPR:$Vd), 3049 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3050 NVMulSLFrm, itin, 3051 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3052 [(set (ResTy QPR:$Vd), 3053 (ResTy (IntOp (ResTy QPR:$src1), 3054 (OpTy DPR:$Vn), 3055 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3056 imm:$lane)))))]>; 3057 3058// Narrowing 3-register intrinsics. 3059class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3060 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 3061 SDPatternOperator IntOp, bit Commutable> 3062 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3063 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 3064 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3065 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 3066 let isCommutable = Commutable; 3067} 3068 3069// Long 3-register operations. 3070class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3071 InstrItinClass itin, string OpcodeStr, string Dt, 3072 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 3073 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3074 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3075 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3076 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3077 let isCommutable = Commutable; 3078} 3079 3080class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 3081 InstrItinClass itin, string OpcodeStr, string Dt, 3082 ValueType TyQ, ValueType TyD, SDNode OpNode> 3083 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3084 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3085 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3086 [(set QPR:$Vd, 3087 (TyQ (OpNode (TyD DPR:$Vn), 3088 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 3089class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3090 InstrItinClass itin, string OpcodeStr, string Dt, 3091 ValueType TyQ, ValueType TyD, SDNode OpNode> 3092 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3093 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3094 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3095 [(set QPR:$Vd, 3096 (TyQ (OpNode (TyD DPR:$Vn), 3097 (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3098 3099// Long 3-register operations with explicitly extended operands. 3100class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3101 InstrItinClass itin, string OpcodeStr, string Dt, 3102 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 3103 bit Commutable> 3104 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3105 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3106 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3107 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3108 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3109 let isCommutable = Commutable; 3110} 3111 3112// Long 3-register intrinsics with explicit extend (VABDL). 3113class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3114 InstrItinClass itin, string OpcodeStr, string Dt, 3115 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3116 bit Commutable> 3117 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3118 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3119 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3120 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3121 (TyD DPR:$Vm))))))]> { 3122 let isCommutable = Commutable; 3123} 3124 3125// Long 3-register intrinsics. 3126class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3127 InstrItinClass itin, string OpcodeStr, string Dt, 3128 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3129 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3130 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3131 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3132 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3133 let isCommutable = Commutable; 3134} 3135 3136// Same as above, but not predicated. 3137class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3138 bit op4, InstrItinClass itin, string OpcodeStr, 3139 string Dt, ValueType ResTy, ValueType OpTy, 3140 SDPatternOperator IntOp, bit Commutable> 3141 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3142 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3143 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 3144 3145class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3146 string OpcodeStr, string Dt, 3147 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3148 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3149 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3150 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3151 [(set (ResTy QPR:$Vd), 3152 (ResTy (IntOp (OpTy DPR:$Vn), 3153 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3154 imm:$lane)))))]>; 3155class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3156 InstrItinClass itin, string OpcodeStr, string Dt, 3157 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3158 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3159 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3160 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3161 [(set (ResTy QPR:$Vd), 3162 (ResTy (IntOp (OpTy DPR:$Vn), 3163 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3164 imm:$lane)))))]>; 3165 3166// Wide 3-register operations. 3167class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3168 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3169 SDNode OpNode, SDNode ExtOp, bit Commutable> 3170 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3171 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3172 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3173 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3174 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3175 // All of these have a two-operand InstAlias. 3176 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3177 let isCommutable = Commutable; 3178} 3179 3180// Pairwise long 2-register intrinsics, both double- and quad-register. 3181class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3182 bits<2> op17_16, bits<5> op11_7, bit op4, 3183 string OpcodeStr, string Dt, 3184 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3185 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3186 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3187 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3188class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3189 bits<2> op17_16, bits<5> op11_7, bit op4, 3190 string OpcodeStr, string Dt, 3191 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3192 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3193 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3194 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3195 3196// Pairwise long 2-register accumulate intrinsics, 3197// both double- and quad-register. 3198// The destination register is also used as the first source operand register. 3199class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3200 bits<2> op17_16, bits<5> op11_7, bit op4, 3201 string OpcodeStr, string Dt, 3202 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3203 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3204 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3205 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3206 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3207class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3208 bits<2> op17_16, bits<5> op11_7, bit op4, 3209 string OpcodeStr, string Dt, 3210 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3211 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3212 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3213 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3214 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3215 3216// Shift by immediate, 3217// both double- and quad-register. 3218let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3219class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3220 Format f, InstrItinClass itin, Operand ImmTy, 3221 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3222 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3223 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3224 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3225 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3226class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3227 Format f, InstrItinClass itin, Operand ImmTy, 3228 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3229 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3230 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3231 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3232 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3233} 3234 3235// Long shift by immediate. 3236class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3237 string OpcodeStr, string Dt, 3238 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3239 SDPatternOperator OpNode> 3240 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3241 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3242 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3243 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3244 3245// Narrow shift by immediate. 3246class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3247 InstrItinClass itin, string OpcodeStr, string Dt, 3248 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3249 SDPatternOperator OpNode> 3250 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3251 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3252 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3253 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3254 (i32 ImmTy:$SIMM))))]>; 3255 3256// Shift right by immediate and accumulate, 3257// both double- and quad-register. 3258let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3259class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3260 Operand ImmTy, string OpcodeStr, string Dt, 3261 ValueType Ty, SDNode ShOp> 3262 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3263 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3264 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3265 [(set DPR:$Vd, (Ty (add DPR:$src1, 3266 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3267class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3268 Operand ImmTy, string OpcodeStr, string Dt, 3269 ValueType Ty, SDNode ShOp> 3270 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3271 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3272 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3273 [(set QPR:$Vd, (Ty (add QPR:$src1, 3274 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3275} 3276 3277// Shift by immediate and insert, 3278// both double- and quad-register. 3279let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3280class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3281 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3282 ValueType Ty,SDNode ShOp> 3283 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3284 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3285 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3286 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3287class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3288 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3289 ValueType Ty,SDNode ShOp> 3290 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3291 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3292 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3293 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3294} 3295 3296// Convert, with fractional bits immediate, 3297// both double- and quad-register. 3298class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3299 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3300 SDPatternOperator IntOp> 3301 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3302 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3303 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3304 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3305class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3306 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3307 SDPatternOperator IntOp> 3308 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3309 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3310 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3311 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3312 3313//===----------------------------------------------------------------------===// 3314// Multiclasses 3315//===----------------------------------------------------------------------===// 3316 3317// Abbreviations used in multiclass suffixes: 3318// Q = quarter int (8 bit) elements 3319// H = half int (16 bit) elements 3320// S = single int (32 bit) elements 3321// D = double int (64 bit) elements 3322 3323// Neon 2-register vector operations and intrinsics. 3324 3325// Neon 2-register comparisons. 3326// source operand element sizes of 8, 16 and 32 bits: 3327multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3328 bits<5> op11_7, bit op4, string opc, string Dt, 3329 string asm, SDNode OpNode> { 3330 // 64-bit vector types. 3331 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3332 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3333 opc, !strconcat(Dt, "8"), asm, "", 3334 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 3335 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3336 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3337 opc, !strconcat(Dt, "16"), asm, "", 3338 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 3339 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3340 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3341 opc, !strconcat(Dt, "32"), asm, "", 3342 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 3343 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3344 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3345 opc, "f32", asm, "", 3346 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 3347 let Inst{10} = 1; // overwrite F = 1 3348 } 3349 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3350 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3351 opc, "f16", asm, "", 3352 [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>, 3353 Requires<[HasNEON,HasFullFP16]> { 3354 let Inst{10} = 1; // overwrite F = 1 3355 } 3356 3357 // 128-bit vector types. 3358 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3359 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3360 opc, !strconcat(Dt, "8"), asm, "", 3361 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3362 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3363 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3364 opc, !strconcat(Dt, "16"), asm, "", 3365 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3366 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3367 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3368 opc, !strconcat(Dt, "32"), asm, "", 3369 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3370 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3371 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3372 opc, "f32", asm, "", 3373 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3374 let Inst{10} = 1; // overwrite F = 1 3375 } 3376 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3377 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3378 opc, "f16", asm, "", 3379 [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>, 3380 Requires<[HasNEON,HasFullFP16]> { 3381 let Inst{10} = 1; // overwrite F = 1 3382 } 3383} 3384 3385 3386// Neon 2-register vector intrinsics, 3387// element sizes of 8, 16 and 32 bits: 3388multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3389 bits<5> op11_7, bit op4, 3390 InstrItinClass itinD, InstrItinClass itinQ, 3391 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3392 // 64-bit vector types. 3393 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3394 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3395 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3396 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3397 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3398 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3399 3400 // 128-bit vector types. 3401 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3402 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3403 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3404 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3405 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3406 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3407} 3408 3409 3410// Neon Narrowing 2-register vector operations, 3411// source operand element sizes of 16, 32 and 64 bits: 3412multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3413 bits<5> op11_7, bit op6, bit op4, 3414 InstrItinClass itin, string OpcodeStr, string Dt, 3415 SDNode OpNode> { 3416 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3417 itin, OpcodeStr, !strconcat(Dt, "16"), 3418 v8i8, v8i16, OpNode>; 3419 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3420 itin, OpcodeStr, !strconcat(Dt, "32"), 3421 v4i16, v4i32, OpNode>; 3422 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3423 itin, OpcodeStr, !strconcat(Dt, "64"), 3424 v2i32, v2i64, OpNode>; 3425} 3426 3427// Neon Narrowing 2-register vector intrinsics, 3428// source operand element sizes of 16, 32 and 64 bits: 3429multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3430 bits<5> op11_7, bit op6, bit op4, 3431 InstrItinClass itin, string OpcodeStr, string Dt, 3432 SDPatternOperator IntOp> { 3433 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3434 itin, OpcodeStr, !strconcat(Dt, "16"), 3435 v8i8, v8i16, IntOp>; 3436 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3437 itin, OpcodeStr, !strconcat(Dt, "32"), 3438 v4i16, v4i32, IntOp>; 3439 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3440 itin, OpcodeStr, !strconcat(Dt, "64"), 3441 v2i32, v2i64, IntOp>; 3442} 3443 3444 3445// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3446// source operand element sizes of 16, 32 and 64 bits: 3447multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3448 string OpcodeStr, string Dt, SDNode OpNode> { 3449 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3450 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3451 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3452 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3453 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3454 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3455} 3456 3457 3458// Neon 3-register vector operations. 3459 3460// First with only element sizes of 8, 16 and 32 bits: 3461multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3462 InstrItinClass itinD16, InstrItinClass itinD32, 3463 InstrItinClass itinQ16, InstrItinClass itinQ32, 3464 string OpcodeStr, string Dt, 3465 SDNode OpNode, bit Commutable = 0> { 3466 // 64-bit vector types. 3467 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3468 OpcodeStr, !strconcat(Dt, "8"), 3469 v8i8, v8i8, OpNode, Commutable>; 3470 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3471 OpcodeStr, !strconcat(Dt, "16"), 3472 v4i16, v4i16, OpNode, Commutable>; 3473 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3474 OpcodeStr, !strconcat(Dt, "32"), 3475 v2i32, v2i32, OpNode, Commutable>; 3476 3477 // 128-bit vector types. 3478 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3479 OpcodeStr, !strconcat(Dt, "8"), 3480 v16i8, v16i8, OpNode, Commutable>; 3481 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3482 OpcodeStr, !strconcat(Dt, "16"), 3483 v8i16, v8i16, OpNode, Commutable>; 3484 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3485 OpcodeStr, !strconcat(Dt, "32"), 3486 v4i32, v4i32, OpNode, Commutable>; 3487} 3488 3489multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3490 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3491 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3492 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3493 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3494 v4i32, v2i32, ShOp>; 3495} 3496 3497// ....then also with element size 64 bits: 3498multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3499 InstrItinClass itinD, InstrItinClass itinQ, 3500 string OpcodeStr, string Dt, 3501 SDNode OpNode, bit Commutable = 0> 3502 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3503 OpcodeStr, Dt, OpNode, Commutable> { 3504 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3505 OpcodeStr, !strconcat(Dt, "64"), 3506 v1i64, v1i64, OpNode, Commutable>; 3507 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3508 OpcodeStr, !strconcat(Dt, "64"), 3509 v2i64, v2i64, OpNode, Commutable>; 3510} 3511 3512 3513// Neon 3-register vector intrinsics. 3514 3515// First with only element sizes of 16 and 32 bits: 3516multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3517 InstrItinClass itinD16, InstrItinClass itinD32, 3518 InstrItinClass itinQ16, InstrItinClass itinQ32, 3519 string OpcodeStr, string Dt, 3520 SDPatternOperator IntOp, bit Commutable = 0> { 3521 // 64-bit vector types. 3522 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3523 OpcodeStr, !strconcat(Dt, "16"), 3524 v4i16, v4i16, IntOp, Commutable>; 3525 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3526 OpcodeStr, !strconcat(Dt, "32"), 3527 v2i32, v2i32, IntOp, Commutable>; 3528 3529 // 128-bit vector types. 3530 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3531 OpcodeStr, !strconcat(Dt, "16"), 3532 v8i16, v8i16, IntOp, Commutable>; 3533 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3534 OpcodeStr, !strconcat(Dt, "32"), 3535 v4i32, v4i32, IntOp, Commutable>; 3536} 3537multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3538 InstrItinClass itinD16, InstrItinClass itinD32, 3539 InstrItinClass itinQ16, InstrItinClass itinQ32, 3540 string OpcodeStr, string Dt, 3541 SDPatternOperator IntOp> { 3542 // 64-bit vector types. 3543 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3544 OpcodeStr, !strconcat(Dt, "16"), 3545 v4i16, v4i16, IntOp>; 3546 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3547 OpcodeStr, !strconcat(Dt, "32"), 3548 v2i32, v2i32, IntOp>; 3549 3550 // 128-bit vector types. 3551 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3552 OpcodeStr, !strconcat(Dt, "16"), 3553 v8i16, v8i16, IntOp>; 3554 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3555 OpcodeStr, !strconcat(Dt, "32"), 3556 v4i32, v4i32, IntOp>; 3557} 3558 3559multiclass N3VIntSL_HS<bits<4> op11_8, 3560 InstrItinClass itinD16, InstrItinClass itinD32, 3561 InstrItinClass itinQ16, InstrItinClass itinQ32, 3562 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3563 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3564 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3565 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3566 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3567 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3568 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3569 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3570 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3571} 3572 3573// ....then also with element size of 8 bits: 3574multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3575 InstrItinClass itinD16, InstrItinClass itinD32, 3576 InstrItinClass itinQ16, InstrItinClass itinQ32, 3577 string OpcodeStr, string Dt, 3578 SDPatternOperator IntOp, bit Commutable = 0> 3579 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3580 OpcodeStr, Dt, IntOp, Commutable> { 3581 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3582 OpcodeStr, !strconcat(Dt, "8"), 3583 v8i8, v8i8, IntOp, Commutable>; 3584 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3585 OpcodeStr, !strconcat(Dt, "8"), 3586 v16i8, v16i8, IntOp, Commutable>; 3587} 3588multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3589 InstrItinClass itinD16, InstrItinClass itinD32, 3590 InstrItinClass itinQ16, InstrItinClass itinQ32, 3591 string OpcodeStr, string Dt, 3592 SDPatternOperator IntOp> 3593 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3594 OpcodeStr, Dt, IntOp> { 3595 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3596 OpcodeStr, !strconcat(Dt, "8"), 3597 v8i8, v8i8, IntOp>; 3598 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3599 OpcodeStr, !strconcat(Dt, "8"), 3600 v16i8, v16i8, IntOp>; 3601} 3602 3603 3604// ....then also with element size of 64 bits: 3605multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3606 InstrItinClass itinD16, InstrItinClass itinD32, 3607 InstrItinClass itinQ16, InstrItinClass itinQ32, 3608 string OpcodeStr, string Dt, 3609 SDPatternOperator IntOp, bit Commutable = 0> 3610 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3611 OpcodeStr, Dt, IntOp, Commutable> { 3612 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3613 OpcodeStr, !strconcat(Dt, "64"), 3614 v1i64, v1i64, IntOp, Commutable>; 3615 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3616 OpcodeStr, !strconcat(Dt, "64"), 3617 v2i64, v2i64, IntOp, Commutable>; 3618} 3619multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3620 InstrItinClass itinD16, InstrItinClass itinD32, 3621 InstrItinClass itinQ16, InstrItinClass itinQ32, 3622 string OpcodeStr, string Dt, 3623 SDPatternOperator IntOp> 3624 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3625 OpcodeStr, Dt, IntOp> { 3626 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3627 OpcodeStr, !strconcat(Dt, "64"), 3628 v1i64, v1i64, IntOp>; 3629 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3630 OpcodeStr, !strconcat(Dt, "64"), 3631 v2i64, v2i64, IntOp>; 3632} 3633 3634// Neon Narrowing 3-register vector intrinsics, 3635// source operand element sizes of 16, 32 and 64 bits: 3636multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3637 string OpcodeStr, string Dt, 3638 SDPatternOperator IntOp, bit Commutable = 0> { 3639 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3640 OpcodeStr, !strconcat(Dt, "16"), 3641 v8i8, v8i16, IntOp, Commutable>; 3642 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3643 OpcodeStr, !strconcat(Dt, "32"), 3644 v4i16, v4i32, IntOp, Commutable>; 3645 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3646 OpcodeStr, !strconcat(Dt, "64"), 3647 v2i32, v2i64, IntOp, Commutable>; 3648} 3649 3650 3651// Neon Long 3-register vector operations. 3652 3653multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3654 InstrItinClass itin16, InstrItinClass itin32, 3655 string OpcodeStr, string Dt, 3656 SDNode OpNode, bit Commutable = 0> { 3657 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3658 OpcodeStr, !strconcat(Dt, "8"), 3659 v8i16, v8i8, OpNode, Commutable>; 3660 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3661 OpcodeStr, !strconcat(Dt, "16"), 3662 v4i32, v4i16, OpNode, Commutable>; 3663 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3664 OpcodeStr, !strconcat(Dt, "32"), 3665 v2i64, v2i32, OpNode, Commutable>; 3666} 3667 3668multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3669 InstrItinClass itin, string OpcodeStr, string Dt, 3670 SDNode OpNode> { 3671 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3672 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3673 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3674 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3675} 3676 3677multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3678 InstrItinClass itin16, InstrItinClass itin32, 3679 string OpcodeStr, string Dt, 3680 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3681 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3682 OpcodeStr, !strconcat(Dt, "8"), 3683 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3684 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3685 OpcodeStr, !strconcat(Dt, "16"), 3686 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3687 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3688 OpcodeStr, !strconcat(Dt, "32"), 3689 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3690} 3691 3692// Neon Long 3-register vector intrinsics. 3693 3694// First with only element sizes of 16 and 32 bits: 3695multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3696 InstrItinClass itin16, InstrItinClass itin32, 3697 string OpcodeStr, string Dt, 3698 SDPatternOperator IntOp, bit Commutable = 0> { 3699 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3700 OpcodeStr, !strconcat(Dt, "16"), 3701 v4i32, v4i16, IntOp, Commutable>; 3702 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3703 OpcodeStr, !strconcat(Dt, "32"), 3704 v2i64, v2i32, IntOp, Commutable>; 3705} 3706 3707multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3708 InstrItinClass itin, string OpcodeStr, string Dt, 3709 SDPatternOperator IntOp> { 3710 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3711 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3712 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3713 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3714} 3715 3716// ....then also with element size of 8 bits: 3717multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3718 InstrItinClass itin16, InstrItinClass itin32, 3719 string OpcodeStr, string Dt, 3720 SDPatternOperator IntOp, bit Commutable = 0> 3721 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3722 IntOp, Commutable> { 3723 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3724 OpcodeStr, !strconcat(Dt, "8"), 3725 v8i16, v8i8, IntOp, Commutable>; 3726} 3727 3728// ....with explicit extend (VABDL). 3729multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3730 InstrItinClass itin, string OpcodeStr, string Dt, 3731 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3732 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3733 OpcodeStr, !strconcat(Dt, "8"), 3734 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3735 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3736 OpcodeStr, !strconcat(Dt, "16"), 3737 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3738 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3739 OpcodeStr, !strconcat(Dt, "32"), 3740 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3741} 3742 3743 3744// Neon Wide 3-register vector intrinsics, 3745// source operand element sizes of 8, 16 and 32 bits: 3746multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3747 string OpcodeStr, string Dt, 3748 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3749 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3750 OpcodeStr, !strconcat(Dt, "8"), 3751 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3752 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3753 OpcodeStr, !strconcat(Dt, "16"), 3754 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3755 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3756 OpcodeStr, !strconcat(Dt, "32"), 3757 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3758} 3759 3760 3761// Neon Multiply-Op vector operations, 3762// element sizes of 8, 16 and 32 bits: 3763multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3764 InstrItinClass itinD16, InstrItinClass itinD32, 3765 InstrItinClass itinQ16, InstrItinClass itinQ32, 3766 string OpcodeStr, string Dt, SDNode OpNode> { 3767 // 64-bit vector types. 3768 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3769 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3770 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3771 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3772 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3773 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3774 3775 // 128-bit vector types. 3776 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3777 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3778 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3779 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3780 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3781 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3782} 3783 3784multiclass N3VMulOpSL_HS<bits<4> op11_8, 3785 InstrItinClass itinD16, InstrItinClass itinD32, 3786 InstrItinClass itinQ16, InstrItinClass itinQ32, 3787 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3788 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3789 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3790 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3791 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3792 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3793 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3794 mul, ShOp>; 3795 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3796 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3797 mul, ShOp>; 3798} 3799 3800// Neon Intrinsic-Op vector operations, 3801// element sizes of 8, 16 and 32 bits: 3802multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3803 InstrItinClass itinD, InstrItinClass itinQ, 3804 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3805 SDNode OpNode> { 3806 // 64-bit vector types. 3807 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3808 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3809 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3810 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3811 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3812 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3813 3814 // 128-bit vector types. 3815 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3816 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3817 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3818 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3819 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3820 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3821} 3822 3823// Neon 3-argument intrinsics, 3824// element sizes of 16 and 32 bits: 3825multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3826 InstrItinClass itinD16, InstrItinClass itinD32, 3827 InstrItinClass itinQ16, InstrItinClass itinQ32, 3828 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3829 // 64-bit vector types. 3830 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3831 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3832 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3833 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3834 3835 // 128-bit vector types. 3836 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3837 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3838 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3839 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3840} 3841 3842// element sizes of 8, 16 and 32 bits: 3843multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3844 InstrItinClass itinD16, InstrItinClass itinD32, 3845 InstrItinClass itinQ16, InstrItinClass itinQ32, 3846 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3847 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3848 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3849 // 64-bit vector types. 3850 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3851 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3852 // 128-bit vector types. 3853 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3854 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3855} 3856 3857// Neon Long Multiply-Op vector operations, 3858// element sizes of 8, 16 and 32 bits: 3859multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3860 InstrItinClass itin16, InstrItinClass itin32, 3861 string OpcodeStr, string Dt, SDNode MulOp, 3862 SDNode OpNode> { 3863 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3864 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3865 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3866 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3867 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3868 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3869} 3870 3871multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3872 string Dt, SDNode MulOp, SDNode OpNode> { 3873 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3874 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3875 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3876 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3877} 3878 3879 3880// Neon Long 3-argument intrinsics. 3881 3882// First with only element sizes of 16 and 32 bits: 3883multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3884 InstrItinClass itin16, InstrItinClass itin32, 3885 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3886 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3887 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3888 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3889 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3890} 3891 3892multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3893 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3894 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3895 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3896 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3897 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3898} 3899 3900// ....then also with element size of 8 bits: 3901multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3902 InstrItinClass itin16, InstrItinClass itin32, 3903 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3904 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3905 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3906 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3907} 3908 3909// ....with explicit extend (VABAL). 3910multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3911 InstrItinClass itin, string OpcodeStr, string Dt, 3912 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3913 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3914 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3915 IntOp, ExtOp, OpNode>; 3916 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3917 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3918 IntOp, ExtOp, OpNode>; 3919 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3920 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3921 IntOp, ExtOp, OpNode>; 3922} 3923 3924 3925// Neon Pairwise long 2-register intrinsics, 3926// element sizes of 8, 16 and 32 bits: 3927multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3928 bits<5> op11_7, bit op4, 3929 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3930 // 64-bit vector types. 3931 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3932 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3933 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3934 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3935 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3936 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3937 3938 // 128-bit vector types. 3939 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3940 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3941 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3942 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3943 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3944 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3945} 3946 3947 3948// Neon Pairwise long 2-register accumulate intrinsics, 3949// element sizes of 8, 16 and 32 bits: 3950multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3951 bits<5> op11_7, bit op4, 3952 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3953 // 64-bit vector types. 3954 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3955 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3956 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3957 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3958 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3959 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3960 3961 // 128-bit vector types. 3962 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3963 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3964 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3965 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3966 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3967 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3968} 3969 3970 3971// Neon 2-register vector shift by immediate, 3972// with f of either N2RegVShLFrm or N2RegVShRFrm 3973// element sizes of 8, 16, 32 and 64 bits: 3974multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3975 InstrItinClass itin, string OpcodeStr, string Dt, 3976 SDNode OpNode> { 3977 // 64-bit vector types. 3978 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3979 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3980 let Inst{21-19} = 0b001; // imm6 = 001xxx 3981 } 3982 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3983 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3984 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3985 } 3986 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3987 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3988 let Inst{21} = 0b1; // imm6 = 1xxxxx 3989 } 3990 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3991 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3992 // imm6 = xxxxxx 3993 3994 // 128-bit vector types. 3995 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3996 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3997 let Inst{21-19} = 0b001; // imm6 = 001xxx 3998 } 3999 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4000 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4001 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4002 } 4003 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4004 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4005 let Inst{21} = 0b1; // imm6 = 1xxxxx 4006 } 4007 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 4008 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4009 // imm6 = xxxxxx 4010} 4011multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4012 InstrItinClass itin, string OpcodeStr, string Dt, 4013 string baseOpc, SDNode OpNode> { 4014 // 64-bit vector types. 4015 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4016 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4017 let Inst{21-19} = 0b001; // imm6 = 001xxx 4018 } 4019 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4020 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4021 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4022 } 4023 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4024 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4025 let Inst{21} = 0b1; // imm6 = 1xxxxx 4026 } 4027 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4028 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4029 // imm6 = xxxxxx 4030 4031 // 128-bit vector types. 4032 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4033 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4034 let Inst{21-19} = 0b001; // imm6 = 001xxx 4035 } 4036 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4037 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4038 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4039 } 4040 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4041 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4042 let Inst{21} = 0b1; // imm6 = 1xxxxx 4043 } 4044 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4045 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4046 // imm6 = xxxxxx 4047} 4048 4049// Neon Shift-Accumulate vector operations, 4050// element sizes of 8, 16, 32 and 64 bits: 4051multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4052 string OpcodeStr, string Dt, SDNode ShOp> { 4053 // 64-bit vector types. 4054 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4055 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 4056 let Inst{21-19} = 0b001; // imm6 = 001xxx 4057 } 4058 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4059 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 4060 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4061 } 4062 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4063 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 4064 let Inst{21} = 0b1; // imm6 = 1xxxxx 4065 } 4066 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4067 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 4068 // imm6 = xxxxxx 4069 4070 // 128-bit vector types. 4071 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4072 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 4073 let Inst{21-19} = 0b001; // imm6 = 001xxx 4074 } 4075 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4076 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 4077 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4078 } 4079 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4080 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 4081 let Inst{21} = 0b1; // imm6 = 1xxxxx 4082 } 4083 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4084 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 4085 // imm6 = xxxxxx 4086} 4087 4088// Neon Shift-Insert vector operations, 4089// with f of either N2RegVShLFrm or N2RegVShRFrm 4090// element sizes of 8, 16, 32 and 64 bits: 4091multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4092 string OpcodeStr> { 4093 // 64-bit vector types. 4094 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4095 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> { 4096 let Inst{21-19} = 0b001; // imm6 = 001xxx 4097 } 4098 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4099 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> { 4100 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4101 } 4102 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4103 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> { 4104 let Inst{21} = 0b1; // imm6 = 1xxxxx 4105 } 4106 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 4107 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>; 4108 // imm6 = xxxxxx 4109 4110 // 128-bit vector types. 4111 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4112 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> { 4113 let Inst{21-19} = 0b001; // imm6 = 001xxx 4114 } 4115 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4116 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> { 4117 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4118 } 4119 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4120 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> { 4121 let Inst{21} = 0b1; // imm6 = 1xxxxx 4122 } 4123 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4124 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>; 4125 // imm6 = xxxxxx 4126} 4127multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4128 string OpcodeStr> { 4129 // 64-bit vector types. 4130 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4131 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> { 4132 let Inst{21-19} = 0b001; // imm6 = 001xxx 4133 } 4134 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4135 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> { 4136 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4137 } 4138 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4139 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> { 4140 let Inst{21} = 0b1; // imm6 = 1xxxxx 4141 } 4142 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4143 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>; 4144 // imm6 = xxxxxx 4145 4146 // 128-bit vector types. 4147 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4148 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> { 4149 let Inst{21-19} = 0b001; // imm6 = 001xxx 4150 } 4151 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4152 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> { 4153 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4154 } 4155 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4156 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> { 4157 let Inst{21} = 0b1; // imm6 = 1xxxxx 4158 } 4159 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4160 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>; 4161 // imm6 = xxxxxx 4162} 4163 4164// Neon Shift Long operations, 4165// element sizes of 8, 16, 32 bits: 4166multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4167 bit op4, string OpcodeStr, string Dt, 4168 SDPatternOperator OpNode> { 4169 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4170 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4171 let Inst{21-19} = 0b001; // imm6 = 001xxx 4172 } 4173 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4174 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4175 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4176 } 4177 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4178 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4179 let Inst{21} = 0b1; // imm6 = 1xxxxx 4180 } 4181} 4182 4183// Neon Shift Narrow operations, 4184// element sizes of 16, 32, 64 bits: 4185multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4186 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4187 SDPatternOperator OpNode> { 4188 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4189 OpcodeStr, !strconcat(Dt, "16"), 4190 v8i8, v8i16, shr_imm8, OpNode> { 4191 let Inst{21-19} = 0b001; // imm6 = 001xxx 4192 } 4193 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4194 OpcodeStr, !strconcat(Dt, "32"), 4195 v4i16, v4i32, shr_imm16, OpNode> { 4196 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4197 } 4198 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4199 OpcodeStr, !strconcat(Dt, "64"), 4200 v2i32, v2i64, shr_imm32, OpNode> { 4201 let Inst{21} = 0b1; // imm6 = 1xxxxx 4202 } 4203} 4204 4205//===----------------------------------------------------------------------===// 4206// Instruction Definitions. 4207//===----------------------------------------------------------------------===// 4208 4209// Vector Add Operations. 4210 4211// VADD : Vector Add (integer and floating-point) 4212defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4213 add, 1>; 4214def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4215 v2f32, v2f32, fadd, 1>; 4216def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4217 v4f32, v4f32, fadd, 1>; 4218def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", 4219 v4f16, v4f16, fadd, 1>, 4220 Requires<[HasNEON,HasFullFP16]>; 4221def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", 4222 v8f16, v8f16, fadd, 1>, 4223 Requires<[HasNEON,HasFullFP16]>; 4224// VADDL : Vector Add Long (Q = D + D) 4225defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4226 "vaddl", "s", add, sext, 1>; 4227defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4228 "vaddl", "u", add, zext, 1>; 4229// VADDW : Vector Add Wide (Q = Q + D) 4230defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4231defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 4232// VHADD : Vector Halving Add 4233defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4234 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4235 "vhadd", "s", int_arm_neon_vhadds, 1>; 4236defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4237 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4238 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4239// VRHADD : Vector Rounding Halving Add 4240defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4241 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4242 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4243defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4244 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4245 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4246// VQADD : Vector Saturating Add 4247defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4248 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4249 "vqadd", "s", int_arm_neon_vqadds, 1>; 4250defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4251 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4252 "vqadd", "u", int_arm_neon_vqaddu, 1>; 4253// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4254defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4255// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4256defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4257 int_arm_neon_vraddhn, 1>; 4258 4259let Predicates = [HasNEON] in { 4260def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4261 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4262def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4263 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4264def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4265 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4266} 4267 4268// Vector Multiply Operations. 4269 4270// VMUL : Vector Multiply (integer, polynomial and floating-point) 4271defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4272 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4273def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4274 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4275def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4276 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4277def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4278 v2f32, v2f32, fmul, 1>; 4279def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4280 v4f32, v4f32, fmul, 1>; 4281def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", 4282 v4f16, v4f16, fmul, 1>, 4283 Requires<[HasNEON,HasFullFP16]>; 4284def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", 4285 v8f16, v8f16, fmul, 1>, 4286 Requires<[HasNEON,HasFullFP16]>; 4287defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4288def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4289def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4290 v2f32, fmul>; 4291def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, 4292 Requires<[HasNEON,HasFullFP16]>; 4293def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, 4294 v4f16, fmul>, 4295 Requires<[HasNEON,HasFullFP16]>; 4296 4297let Predicates = [HasNEON] in { 4298def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4299 (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))), 4300 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4301 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4302 (DSubReg_i16_reg imm:$lane))), 4303 (SubReg_i16_lane imm:$lane)))>; 4304def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4305 (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))), 4306 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4307 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4308 (DSubReg_i32_reg imm:$lane))), 4309 (SubReg_i32_lane imm:$lane)))>; 4310def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4311 (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))), 4312 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4313 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4314 (DSubReg_i32_reg imm:$lane))), 4315 (SubReg_i32_lane imm:$lane)))>; 4316def : Pat<(v8f16 (fmul (v8f16 QPR:$src1), 4317 (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))), 4318 (v8f16 (VMULslhq(v8f16 QPR:$src1), 4319 (v4f16 (EXTRACT_SUBREG QPR:$src2, 4320 (DSubReg_i16_reg imm:$lane))), 4321 (SubReg_i16_lane imm:$lane)))>; 4322 4323def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4324 (VMULslfd DPR:$Rn, 4325 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4326 (i32 0))>; 4327def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4328 (VMULslhd DPR:$Rn, 4329 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0), 4330 (i32 0))>; 4331def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4332 (VMULslfq QPR:$Rn, 4333 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4334 (i32 0))>; 4335def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4336 (VMULslhq QPR:$Rn, 4337 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0), 4338 (i32 0))>; 4339} 4340 4341// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4342defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4343 IIC_VMULi16Q, IIC_VMULi32Q, 4344 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4345defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4346 IIC_VMULi16Q, IIC_VMULi32Q, 4347 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4348 4349let Predicates = [HasNEON] in { 4350def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4351 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4352 imm:$lane)))), 4353 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4354 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4355 (DSubReg_i16_reg imm:$lane))), 4356 (SubReg_i16_lane imm:$lane)))>; 4357def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4358 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4359 imm:$lane)))), 4360 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4361 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4362 (DSubReg_i32_reg imm:$lane))), 4363 (SubReg_i32_lane imm:$lane)))>; 4364} 4365 4366// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4367defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4368 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4369 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4370defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4371 IIC_VMULi16Q, IIC_VMULi32Q, 4372 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4373 4374let Predicates = [HasNEON] in { 4375def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4376 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4377 imm:$lane)))), 4378 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4379 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4380 (DSubReg_i16_reg imm:$lane))), 4381 (SubReg_i16_lane imm:$lane)))>; 4382def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4383 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4384 imm:$lane)))), 4385 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4386 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4387 (DSubReg_i32_reg imm:$lane))), 4388 (SubReg_i32_lane imm:$lane)))>; 4389} 4390 4391// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4392let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4393 DecoderNamespace = "NEONData" in { 4394 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4395 "vmull", "s", NEONvmulls, 1>; 4396 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4397 "vmull", "u", NEONvmullu, 1>; 4398 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4399 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4400 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4401 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4402 Requires<[HasV8, HasCrypto]>; 4403} 4404defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4405defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4406 4407// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4408defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4409 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4410defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4411 "vqdmull", "s", int_arm_neon_vqdmull>; 4412 4413// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4414 4415// VMLA : Vector Multiply Accumulate (integer and floating-point) 4416defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4417 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4418def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4419 v2f32, fmul_su, fadd_mlx>, 4420 Requires<[HasNEON, UseFPVMLx]>; 4421def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4422 v4f32, fmul_su, fadd_mlx>, 4423 Requires<[HasNEON, UseFPVMLx]>; 4424def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", 4425 v4f16, fmul_su, fadd_mlx>, 4426 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4427def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", 4428 v8f16, fmul_su, fadd_mlx>, 4429 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4430defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4431 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4432def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4433 v2f32, fmul_su, fadd_mlx>, 4434 Requires<[HasNEON, UseFPVMLx]>; 4435def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4436 v4f32, v2f32, fmul_su, fadd_mlx>, 4437 Requires<[HasNEON, UseFPVMLx]>; 4438def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", 4439 v4f16, fmul, fadd>, 4440 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4441def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", 4442 v8f16, v4f16, fmul, fadd>, 4443 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4444 4445let Predicates = [HasNEON] in { 4446def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4447 (mul (v8i16 QPR:$src2), 4448 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4449 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4450 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4451 (DSubReg_i16_reg imm:$lane))), 4452 (SubReg_i16_lane imm:$lane)))>; 4453 4454def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4455 (mul (v4i32 QPR:$src2), 4456 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4457 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4458 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4459 (DSubReg_i32_reg imm:$lane))), 4460 (SubReg_i32_lane imm:$lane)))>; 4461} 4462 4463def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4464 (fmul_su (v4f32 QPR:$src2), 4465 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4466 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4467 (v4f32 QPR:$src2), 4468 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4469 (DSubReg_i32_reg imm:$lane))), 4470 (SubReg_i32_lane imm:$lane)))>, 4471 Requires<[HasNEON, UseFPVMLx]>; 4472 4473// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4474defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4475 "vmlal", "s", NEONvmulls, add>; 4476defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4477 "vmlal", "u", NEONvmullu, add>; 4478 4479defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4480defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4481 4482let Predicates = [HasNEON, HasV8_1a] in { 4483 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4484 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4485 // (Q += D * D) 4486 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4487 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4488 null_frag>; 4489 def : Pat<(v4i16 (int_arm_neon_vqadds 4490 (v4i16 DPR:$src1), 4491 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4492 (v4i16 DPR:$Vm))))), 4493 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4494 def : Pat<(v2i32 (int_arm_neon_vqadds 4495 (v2i32 DPR:$src1), 4496 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4497 (v2i32 DPR:$Vm))))), 4498 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4499 def : Pat<(v8i16 (int_arm_neon_vqadds 4500 (v8i16 QPR:$src1), 4501 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4502 (v8i16 QPR:$Vm))))), 4503 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4504 def : Pat<(v4i32 (int_arm_neon_vqadds 4505 (v4i32 QPR:$src1), 4506 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4507 (v4i32 QPR:$Vm))))), 4508 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4509 4510 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4511 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4512 null_frag>; 4513 def : Pat<(v4i16 (int_arm_neon_vqadds 4514 (v4i16 DPR:$src1), 4515 (v4i16 (int_arm_neon_vqrdmulh 4516 (v4i16 DPR:$Vn), 4517 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4518 imm:$lane)))))), 4519 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4520 imm:$lane))>; 4521 def : Pat<(v2i32 (int_arm_neon_vqadds 4522 (v2i32 DPR:$src1), 4523 (v2i32 (int_arm_neon_vqrdmulh 4524 (v2i32 DPR:$Vn), 4525 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4526 imm:$lane)))))), 4527 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4528 imm:$lane))>; 4529 def : Pat<(v8i16 (int_arm_neon_vqadds 4530 (v8i16 QPR:$src1), 4531 (v8i16 (int_arm_neon_vqrdmulh 4532 (v8i16 QPR:$src2), 4533 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4534 imm:$lane)))))), 4535 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4536 (v8i16 QPR:$src2), 4537 (v4i16 (EXTRACT_SUBREG 4538 QPR:$src3, 4539 (DSubReg_i16_reg imm:$lane))), 4540 (SubReg_i16_lane imm:$lane)))>; 4541 def : Pat<(v4i32 (int_arm_neon_vqadds 4542 (v4i32 QPR:$src1), 4543 (v4i32 (int_arm_neon_vqrdmulh 4544 (v4i32 QPR:$src2), 4545 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4546 imm:$lane)))))), 4547 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4548 (v4i32 QPR:$src2), 4549 (v2i32 (EXTRACT_SUBREG 4550 QPR:$src3, 4551 (DSubReg_i32_reg imm:$lane))), 4552 (SubReg_i32_lane imm:$lane)))>; 4553 4554 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4555 // (Q -= D * D) 4556 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4557 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4558 null_frag>; 4559 def : Pat<(v4i16 (int_arm_neon_vqsubs 4560 (v4i16 DPR:$src1), 4561 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4562 (v4i16 DPR:$Vm))))), 4563 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4564 def : Pat<(v2i32 (int_arm_neon_vqsubs 4565 (v2i32 DPR:$src1), 4566 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4567 (v2i32 DPR:$Vm))))), 4568 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4569 def : Pat<(v8i16 (int_arm_neon_vqsubs 4570 (v8i16 QPR:$src1), 4571 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4572 (v8i16 QPR:$Vm))))), 4573 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4574 def : Pat<(v4i32 (int_arm_neon_vqsubs 4575 (v4i32 QPR:$src1), 4576 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4577 (v4i32 QPR:$Vm))))), 4578 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4579 4580 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4581 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4582 null_frag>; 4583 def : Pat<(v4i16 (int_arm_neon_vqsubs 4584 (v4i16 DPR:$src1), 4585 (v4i16 (int_arm_neon_vqrdmulh 4586 (v4i16 DPR:$Vn), 4587 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4588 imm:$lane)))))), 4589 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4590 def : Pat<(v2i32 (int_arm_neon_vqsubs 4591 (v2i32 DPR:$src1), 4592 (v2i32 (int_arm_neon_vqrdmulh 4593 (v2i32 DPR:$Vn), 4594 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4595 imm:$lane)))))), 4596 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4597 imm:$lane))>; 4598 def : Pat<(v8i16 (int_arm_neon_vqsubs 4599 (v8i16 QPR:$src1), 4600 (v8i16 (int_arm_neon_vqrdmulh 4601 (v8i16 QPR:$src2), 4602 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4603 imm:$lane)))))), 4604 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4605 (v8i16 QPR:$src2), 4606 (v4i16 (EXTRACT_SUBREG 4607 QPR:$src3, 4608 (DSubReg_i16_reg imm:$lane))), 4609 (SubReg_i16_lane imm:$lane)))>; 4610 def : Pat<(v4i32 (int_arm_neon_vqsubs 4611 (v4i32 QPR:$src1), 4612 (v4i32 (int_arm_neon_vqrdmulh 4613 (v4i32 QPR:$src2), 4614 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4615 imm:$lane)))))), 4616 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4617 (v4i32 QPR:$src2), 4618 (v2i32 (EXTRACT_SUBREG 4619 QPR:$src3, 4620 (DSubReg_i32_reg imm:$lane))), 4621 (SubReg_i32_lane imm:$lane)))>; 4622} 4623// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4624defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4625 "vqdmlal", "s", null_frag>; 4626defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4627 4628let Predicates = [HasNEON] in { 4629def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4630 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4631 (v4i16 DPR:$Vm))))), 4632 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4633def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4634 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4635 (v2i32 DPR:$Vm))))), 4636 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4637def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4638 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4639 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4640 imm:$lane)))))), 4641 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4642def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4643 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4644 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4645 imm:$lane)))))), 4646 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4647} 4648 4649// VMLS : Vector Multiply Subtract (integer and floating-point) 4650defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4651 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4652def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4653 v2f32, fmul_su, fsub_mlx>, 4654 Requires<[HasNEON, UseFPVMLx]>; 4655def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4656 v4f32, fmul_su, fsub_mlx>, 4657 Requires<[HasNEON, UseFPVMLx]>; 4658def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", 4659 v4f16, fmul, fsub>, 4660 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4661def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", 4662 v8f16, fmul, fsub>, 4663 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4664defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4665 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4666def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4667 v2f32, fmul_su, fsub_mlx>, 4668 Requires<[HasNEON, UseFPVMLx]>; 4669def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4670 v4f32, v2f32, fmul_su, fsub_mlx>, 4671 Requires<[HasNEON, UseFPVMLx]>; 4672def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", 4673 v4f16, fmul, fsub>, 4674 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4675def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", 4676 v8f16, v4f16, fmul, fsub>, 4677 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4678 4679let Predicates = [HasNEON] in { 4680def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4681 (mul (v8i16 QPR:$src2), 4682 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4683 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4684 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4685 (DSubReg_i16_reg imm:$lane))), 4686 (SubReg_i16_lane imm:$lane)))>; 4687 4688def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4689 (mul (v4i32 QPR:$src2), 4690 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4691 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4692 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4693 (DSubReg_i32_reg imm:$lane))), 4694 (SubReg_i32_lane imm:$lane)))>; 4695} 4696 4697def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4698 (fmul_su (v4f32 QPR:$src2), 4699 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4700 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4701 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4702 (DSubReg_i32_reg imm:$lane))), 4703 (SubReg_i32_lane imm:$lane)))>, 4704 Requires<[HasNEON, UseFPVMLx]>; 4705 4706// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4707defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4708 "vmlsl", "s", NEONvmulls, sub>; 4709defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4710 "vmlsl", "u", NEONvmullu, sub>; 4711 4712defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4713defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4714 4715// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4716defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4717 "vqdmlsl", "s", null_frag>; 4718defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4719 4720let Predicates = [HasNEON] in { 4721def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4722 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4723 (v4i16 DPR:$Vm))))), 4724 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4725def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4726 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4727 (v2i32 DPR:$Vm))))), 4728 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4729def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4730 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4731 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4732 imm:$lane)))))), 4733 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4734def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4735 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4736 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4737 imm:$lane)))))), 4738 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4739} 4740 4741// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4742def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4743 v2f32, fmul_su, fadd_mlx>, 4744 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4745 4746def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4747 v4f32, fmul_su, fadd_mlx>, 4748 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4749def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", 4750 v4f16, fmul, fadd>, 4751 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4752 4753def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", 4754 v8f16, fmul, fadd>, 4755 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4756 4757// Fused Vector Multiply Subtract (floating-point) 4758def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4759 v2f32, fmul_su, fsub_mlx>, 4760 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4761def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4762 v4f32, fmul_su, fsub_mlx>, 4763 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4764def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", 4765 v4f16, fmul, fsub>, 4766 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4767def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", 4768 v8f16, fmul, fsub>, 4769 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4770 4771// Match @llvm.fma.* intrinsics 4772def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4773 (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4774 Requires<[HasNEON,HasFullFP16]>; 4775def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4776 (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4777 Requires<[HasNEON,HasFullFP16]>; 4778def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4779 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4780 Requires<[HasNEON,HasVFP4]>; 4781def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4782 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4783 Requires<[HasNEON,HasVFP4]>; 4784def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4785 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4786 Requires<[HasNEON,HasVFP4]>; 4787def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4788 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4789 Requires<[HasNEON,HasVFP4]>; 4790 4791// ARMv8.2a dot product instructions. 4792// We put them in the VFPV8 decoder namespace because the ARM and Thumb 4793// encodings are the same and thus no further bit twiddling is necessary 4794// in the disassembler. 4795class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy, 4796 ValueType AccumTy, ValueType InputTy, 4797 SDPatternOperator OpNode> : 4798 N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst), 4799 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, 4800 Asm, AsmTy, 4801 [(set (AccumTy RegTy:$dst), 4802 (OpNode (AccumTy RegTy:$Vd), 4803 (InputTy RegTy:$Vn), 4804 (InputTy RegTy:$Vm)))]> { 4805 let Predicates = [HasDotProd]; 4806 let DecoderNamespace = "VFPV8"; 4807 let Constraints = "$dst = $Vd"; 4808} 4809 4810def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>; 4811def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>; 4812def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>; 4813def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>; 4814 4815// Indexed dot product instructions: 4816multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty, 4817 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode, 4818 dag RHS> { 4819 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst), 4820 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 4821 N3RegFrm, IIC_VDOTPROD, opc, dt, []> { 4822 bit lane; 4823 let Inst{5} = lane; 4824 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); 4825 let Constraints = "$dst = $Vd"; 4826 let Predicates = [HasDotProd]; 4827 let DecoderNamespace = "VFPV8"; 4828 } 4829 4830 def : Pat< 4831 (AccumType (OpNode (AccumType Ty:$Vd), 4832 (InputType Ty:$Vn), 4833 (InputType (bitconvert (AccumType 4834 (ARMvduplane (AccumType Ty:$Vm), 4835 VectorIndex32:$lane)))))), 4836 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>; 4837} 4838 4839defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8, 4840 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>; 4841defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8, 4842 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>; 4843defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8, 4844 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4845defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8, 4846 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4847 4848 4849// ARMv8.3 complex operations 4850class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q, 4851 InstrItinClass itin, dag oops, dag iops, 4852 string opc, string dt, list<dag> pattern> 4853 : N3VCP8<{?,?}, {op21,s}, q, op4, oops, 4854 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ 4855 bits<2> rot; 4856 let Inst{24-23} = rot; 4857} 4858 4859class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q, 4860 InstrItinClass itin, dag oops, dag iops, string opc, 4861 string dt, list<dag> pattern> 4862 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, 4863 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { 4864 bits<1> rot; 4865 let Inst{24} = rot; 4866} 4867 4868class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin, 4869 dag oops, dag iops, string opc, string dt, 4870 list<dag> pattern> 4871 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4872 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4873 bits<2> rot; 4874 bit lane; 4875 4876 let Inst{21-20} = rot; 4877 let Inst{5} = lane; 4878} 4879 4880class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin, 4881 dag oops, dag iops, string opc, string dt, 4882 list<dag> pattern> 4883 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4884 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4885 bits<2> rot; 4886 bit lane; 4887 4888 let Inst{21-20} = rot; 4889 let Inst{5} = Vm{4}; 4890 // This is needed because the lane operand does not have any bits in the 4891 // encoding (it only has one possible value), so we need to manually set it 4892 // to it's default value. 4893 let DecoderMethod = "DecodeNEONComplexLane64Instruction"; 4894} 4895 4896multiclass N3VCP8ComplexTied<bit op21, bit op4, 4897 string OpcodeStr, SDPatternOperator Op> { 4898 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4899 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd), 4900 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4901 OpcodeStr, "f16", []>; 4902 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd), 4903 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4904 OpcodeStr, "f16", []>; 4905 } 4906 let Predicates = [HasNEON,HasV8_3a] in { 4907 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd), 4908 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4909 OpcodeStr, "f32", []>; 4910 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd), 4911 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4912 OpcodeStr, "f32", []>; 4913 } 4914} 4915 4916multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4, 4917 string OpcodeStr, SDPatternOperator Op> { 4918 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4919 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD, 4920 (outs DPR:$Vd), 4921 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4922 OpcodeStr, "f16", []>; 4923 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ, 4924 (outs QPR:$Vd), 4925 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4926 OpcodeStr, "f16", []>; 4927 } 4928 let Predicates = [HasNEON,HasV8_3a] in { 4929 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD, 4930 (outs DPR:$Vd), 4931 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4932 OpcodeStr, "f32", []>; 4933 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ, 4934 (outs QPR:$Vd), 4935 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4936 OpcodeStr, "f32", []>; 4937 } 4938} 4939 4940// These instructions index by pairs of lanes, so the VectorIndexes are twice 4941// as wide as the data types. 4942multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr, 4943 SDPatternOperator Op> { 4944 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4945 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD, 4946 (outs DPR:$Vd), 4947 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4948 VectorIndex32:$lane, complexrotateop:$rot), 4949 OpcodeStr, "f16", []>; 4950 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ, 4951 (outs QPR:$Vd), 4952 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, 4953 VectorIndex32:$lane, complexrotateop:$rot), 4954 OpcodeStr, "f16", []>; 4955 } 4956 let Predicates = [HasNEON,HasV8_3a] in { 4957 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD, 4958 (outs DPR:$Vd), 4959 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 4960 complexrotateop:$rot), 4961 OpcodeStr, "f32", []>; 4962 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ, 4963 (outs QPR:$Vd), 4964 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 4965 complexrotateop:$rot), 4966 OpcodeStr, "f32", []>; 4967 } 4968} 4969 4970defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; 4971defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; 4972defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; 4973 4974// Vector Subtract Operations. 4975 4976// VSUB : Vector Subtract (integer and floating-point) 4977defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4978 "vsub", "i", sub, 0>; 4979def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4980 v2f32, v2f32, fsub, 0>; 4981def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4982 v4f32, v4f32, fsub, 0>; 4983def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", 4984 v4f16, v4f16, fsub, 0>, 4985 Requires<[HasNEON,HasFullFP16]>; 4986def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", 4987 v8f16, v8f16, fsub, 0>, 4988 Requires<[HasNEON,HasFullFP16]>; 4989// VSUBL : Vector Subtract Long (Q = D - D) 4990defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4991 "vsubl", "s", sub, sext, 0>; 4992defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4993 "vsubl", "u", sub, zext, 0>; 4994// VSUBW : Vector Subtract Wide (Q = Q - D) 4995defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4996defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4997// VHSUB : Vector Halving Subtract 4998defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4999 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5000 "vhsub", "s", int_arm_neon_vhsubs, 0>; 5001defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 5002 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5003 "vhsub", "u", int_arm_neon_vhsubu, 0>; 5004// VQSUB : Vector Saturing Subtract 5005defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 5006 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5007 "vqsub", "s", int_arm_neon_vqsubs, 0>; 5008defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 5009 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5010 "vqsub", "u", int_arm_neon_vqsubu, 0>; 5011// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 5012defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 5013// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 5014defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 5015 int_arm_neon_vrsubhn, 0>; 5016 5017let Predicates = [HasNEON] in { 5018def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 5019 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 5020def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 5021 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 5022def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 5023 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 5024} 5025 5026// Vector Comparisons. 5027 5028// VCEQ : Vector Compare Equal 5029defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5030 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 5031def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 5032 NEONvceq, 1>; 5033def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 5034 NEONvceq, 1>; 5035def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, 5036 NEONvceq, 1>, 5037 Requires<[HasNEON, HasFullFP16]>; 5038def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, 5039 NEONvceq, 1>, 5040 Requires<[HasNEON, HasFullFP16]>; 5041 5042let TwoOperandAliasConstraint = "$Vm = $Vd" in 5043defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 5044 "$Vd, $Vm, #0", NEONvceqz>; 5045 5046// VCGE : Vector Compare Greater Than or Equal 5047defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5048 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 5049defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5050 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 5051def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 5052 NEONvcge, 0>; 5053def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 5054 NEONvcge, 0>; 5055def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, 5056 NEONvcge, 0>, 5057 Requires<[HasNEON, HasFullFP16]>; 5058def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, 5059 NEONvcge, 0>, 5060 Requires<[HasNEON, HasFullFP16]>; 5061 5062let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5063defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 5064 "$Vd, $Vm, #0", NEONvcgez>; 5065defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 5066 "$Vd, $Vm, #0", NEONvclez>; 5067} 5068 5069// VCGT : Vector Compare Greater Than 5070defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5071 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 5072defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5073 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 5074def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 5075 NEONvcgt, 0>; 5076def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 5077 NEONvcgt, 0>; 5078def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, 5079 NEONvcgt, 0>, 5080 Requires<[HasNEON, HasFullFP16]>; 5081def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, 5082 NEONvcgt, 0>, 5083 Requires<[HasNEON, HasFullFP16]>; 5084 5085let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5086defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 5087 "$Vd, $Vm, #0", NEONvcgtz>; 5088defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 5089 "$Vd, $Vm, #0", NEONvcltz>; 5090} 5091 5092// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 5093def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5094 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 5095def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5096 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 5097def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5098 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, 5099 Requires<[HasNEON, HasFullFP16]>; 5100def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5101 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, 5102 Requires<[HasNEON, HasFullFP16]>; 5103// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 5104def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5105 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 5106def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5107 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 5108def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5109 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, 5110 Requires<[HasNEON, HasFullFP16]>; 5111def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5112 "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>, 5113 Requires<[HasNEON, HasFullFP16]>; 5114// VTST : Vector Test Bits 5115defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 5116 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 5117 5118def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5119 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5120def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5121 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5122def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5123 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5124def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5125 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5126let Predicates = [HasNEON, HasFullFP16] in { 5127def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5128 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5129def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5130 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5131def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5132 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5133def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5134 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5135} 5136 5137// +fp16fml Floating Point Multiplication Variants 5138let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in { 5139 5140class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn, 5141 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5142 : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5143 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5144 5145class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn, 5146 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5147 : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5148 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5149 5150// Vd, Vs, Vs[0-15], Idx[0-1] 5151class VFMD<string opc, string type, bits<2> S> 5152 : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd), 5153 (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx), 5154 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5155 bit idx; 5156 let Inst{3} = idx; 5157 let Inst{19-16} = Vn{4-1}; 5158 let Inst{7} = Vn{0}; 5159 let Inst{5} = Vm{0}; 5160 let Inst{2-0} = Vm{3-1}; 5161} 5162 5163// Vq, Vd, Vd[0-7], Idx[0-3] 5164class VFMQ<string opc, string type, bits<2> S> 5165 : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd), 5166 (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 5167 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5168 bits<2> idx; 5169 let Inst{5} = idx{1}; 5170 let Inst{3} = idx{0}; 5171} 5172 5173let hasNoSchedulingInfo = 1 in { 5174// op1 op2 op3 5175def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>; 5176def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>; 5177def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>; 5178def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>; 5179def VFMALDI : VFMD<"vfmal", "f16", 0b00>; 5180def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>; 5181def VFMALQI : VFMQ<"vfmal", "f16", 0b00>; 5182def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>; 5183} 5184} // HasNEON, HasFP16FML 5185 5186 5187def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5188 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5189def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5190 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5191def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5192 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5193def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5194 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5195let Predicates = [HasNEON, HasFullFP16] in { 5196def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5197 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5198def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5199 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5200def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5201 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5202def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5203 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5204} 5205 5206// Vector Bitwise Operations. 5207 5208def vnotd : PatFrag<(ops node:$in), 5209 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 5210def vnotq : PatFrag<(ops node:$in), 5211 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 5212 5213 5214// VAND : Vector Bitwise AND 5215def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 5216 v2i32, v2i32, and, 1>; 5217def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 5218 v4i32, v4i32, and, 1>; 5219 5220// VEOR : Vector Bitwise Exclusive OR 5221def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 5222 v2i32, v2i32, xor, 1>; 5223def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 5224 v4i32, v4i32, xor, 1>; 5225 5226// VORR : Vector Bitwise OR 5227def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 5228 v2i32, v2i32, or, 1>; 5229def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 5230 v4i32, v4i32, or, 1>; 5231 5232def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 5233 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5234 IIC_VMOVImm, 5235 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5236 [(set DPR:$Vd, 5237 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 5238 let Inst{9} = SIMM{9}; 5239} 5240 5241def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 5242 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5243 IIC_VMOVImm, 5244 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5245 [(set DPR:$Vd, 5246 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 5247 let Inst{10-9} = SIMM{10-9}; 5248} 5249 5250def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 5251 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5252 IIC_VMOVImm, 5253 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5254 [(set QPR:$Vd, 5255 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 5256 let Inst{9} = SIMM{9}; 5257} 5258 5259def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 5260 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5261 IIC_VMOVImm, 5262 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5263 [(set QPR:$Vd, 5264 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 5265 let Inst{10-9} = SIMM{10-9}; 5266} 5267 5268 5269// VBIC : Vector Bitwise Bit Clear (AND NOT) 5270let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5271def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5272 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5273 "vbic", "$Vd, $Vn, $Vm", "", 5274 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 5275 (vnotd DPR:$Vm))))]>; 5276def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5277 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5278 "vbic", "$Vd, $Vn, $Vm", "", 5279 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 5280 (vnotq QPR:$Vm))))]>; 5281} 5282 5283def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 5284 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5285 IIC_VMOVImm, 5286 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5287 [(set DPR:$Vd, 5288 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 5289 let Inst{9} = SIMM{9}; 5290} 5291 5292def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 5293 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5294 IIC_VMOVImm, 5295 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5296 [(set DPR:$Vd, 5297 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 5298 let Inst{10-9} = SIMM{10-9}; 5299} 5300 5301def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 5302 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5303 IIC_VMOVImm, 5304 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5305 [(set QPR:$Vd, 5306 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 5307 let Inst{9} = SIMM{9}; 5308} 5309 5310def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 5311 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5312 IIC_VMOVImm, 5313 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5314 [(set QPR:$Vd, 5315 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 5316 let Inst{10-9} = SIMM{10-9}; 5317} 5318 5319// VORN : Vector Bitwise OR NOT 5320def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 5321 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5322 "vorn", "$Vd, $Vn, $Vm", "", 5323 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 5324 (vnotd DPR:$Vm))))]>; 5325def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 5326 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5327 "vorn", "$Vd, $Vn, $Vm", "", 5328 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 5329 (vnotq QPR:$Vm))))]>; 5330 5331// VMVN : Vector Bitwise NOT (Immediate) 5332 5333let isReMaterializable = 1 in { 5334 5335def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 5336 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5337 "vmvn", "i16", "$Vd, $SIMM", "", 5338 [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> { 5339 let Inst{9} = SIMM{9}; 5340} 5341 5342def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 5343 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5344 "vmvn", "i16", "$Vd, $SIMM", "", 5345 [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> { 5346 let Inst{9} = SIMM{9}; 5347} 5348 5349def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 5350 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5351 "vmvn", "i32", "$Vd, $SIMM", "", 5352 [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> { 5353 let Inst{11-8} = SIMM{11-8}; 5354} 5355 5356def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 5357 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5358 "vmvn", "i32", "$Vd, $SIMM", "", 5359 [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> { 5360 let Inst{11-8} = SIMM{11-8}; 5361} 5362} 5363 5364// VMVN : Vector Bitwise NOT 5365def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 5366 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 5367 "vmvn", "$Vd, $Vm", "", 5368 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 5369def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 5370 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 5371 "vmvn", "$Vd, $Vm", "", 5372 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 5373let Predicates = [HasNEON] in { 5374def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 5375def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 5376} 5377 5378// VBSL : Vector Bitwise Select 5379def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5380 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5381 N3RegFrm, IIC_VCNTiD, 5382 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5383 [(set DPR:$Vd, 5384 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 5385let Predicates = [HasNEON] in { 5386def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 5387 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 5388 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5389def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 5390 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 5391 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5392def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 5393 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 5394 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5395def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 5396 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 5397 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5398def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 5399 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 5400 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5401 5402def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 5403 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5404 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5405 5406def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 5407 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5408 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5409} 5410 5411def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5412 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5413 N3RegFrm, IIC_VCNTiQ, 5414 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5415 [(set QPR:$Vd, 5416 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 5417 5418let Predicates = [HasNEON] in { 5419def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 5420 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 5421 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5422def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 5423 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 5424 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5425def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 5426 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 5427 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5428def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 5429 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 5430 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5431def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 5432 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 5433 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5434 5435def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 5436 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5437 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5438def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 5439 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5440 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5441} 5442 5443// VBIF : Vector Bitwise Insert if False 5444// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 5445// FIXME: This instruction's encoding MAY NOT BE correct. 5446def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 5447 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5448 N3RegFrm, IIC_VBINiD, 5449 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5450 []>; 5451def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 5452 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5453 N3RegFrm, IIC_VBINiQ, 5454 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5455 []>; 5456 5457// VBIT : Vector Bitwise Insert if True 5458// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 5459// FIXME: This instruction's encoding MAY NOT BE correct. 5460def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 5461 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5462 N3RegFrm, IIC_VBINiD, 5463 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5464 []>; 5465def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 5466 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5467 N3RegFrm, IIC_VBINiQ, 5468 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5469 []>; 5470 5471// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 5472// for equivalent operations with different register constraints; it just 5473// inserts copies. 5474 5475// Vector Absolute Differences. 5476 5477// VABD : Vector Absolute Difference 5478defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 5479 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5480 "vabd", "s", int_arm_neon_vabds, 1>; 5481defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 5482 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5483 "vabd", "u", int_arm_neon_vabdu, 1>; 5484def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5485 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5486def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5487 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5488def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, 5489 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, 5490 Requires<[HasNEON, HasFullFP16]>; 5491def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5492 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, 5493 Requires<[HasNEON, HasFullFP16]>; 5494 5495// VABDL : Vector Absolute Difference Long (Q = | D - D |) 5496defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5497 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5498defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5499 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5500 5501let Predicates = [HasNEON] in { 5502def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), 5503 (VABDLuv8i16 DPR:$opA, DPR:$opB)>; 5504def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), 5505 (VABDLuv4i32 DPR:$opA, DPR:$opB)>; 5506} 5507 5508// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the 5509// shift/xor pattern for ABS. 5510 5511def abd_shr : 5512 PatFrag<(ops node:$in1, node:$in2, node:$shift), 5513 (ARMvshrsImm (sub (zext node:$in1), 5514 (zext node:$in2)), (i32 $shift))>; 5515 5516let Predicates = [HasNEON] in { 5517def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), 5518 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), 5519 (zext (v2i32 DPR:$opB))), 5520 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), 5521 (VABDLuv2i64 DPR:$opA, DPR:$opB)>; 5522} 5523 5524// VABA : Vector Absolute Difference and Accumulate 5525defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5526 "vaba", "s", int_arm_neon_vabds, add>; 5527defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5528 "vaba", "u", int_arm_neon_vabdu, add>; 5529 5530// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5531defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5532 "vabal", "s", int_arm_neon_vabds, zext, add>; 5533defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5534 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5535 5536// Vector Maximum and Minimum. 5537 5538// VMAX : Vector Maximum 5539defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5540 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5541 "vmax", "s", smax, 1>; 5542defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5543 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5544 "vmax", "u", umax, 1>; 5545def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5546 "vmax", "f32", 5547 v2f32, v2f32, fmaximum, 1>; 5548def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5549 "vmax", "f32", 5550 v4f32, v4f32, fmaximum, 1>; 5551def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, 5552 "vmax", "f16", 5553 v4f16, v4f16, fmaximum, 1>, 5554 Requires<[HasNEON, HasFullFP16]>; 5555def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5556 "vmax", "f16", 5557 v8f16, v8f16, fmaximum, 1>, 5558 Requires<[HasNEON, HasFullFP16]>; 5559 5560// VMAXNM 5561let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5562 def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5563 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5564 v2f32, v2f32, fmaxnum, 1>, 5565 Requires<[HasV8, HasNEON]>; 5566 def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5567 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5568 v4f32, v4f32, fmaxnum, 1>, 5569 Requires<[HasV8, HasNEON]>; 5570 def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, 5571 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5572 v4f16, v4f16, fmaxnum, 1>, 5573 Requires<[HasV8, HasNEON, HasFullFP16]>; 5574 def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, 5575 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5576 v8f16, v8f16, fmaxnum, 1>, 5577 Requires<[HasV8, HasNEON, HasFullFP16]>; 5578} 5579 5580// VMIN : Vector Minimum 5581defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5582 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5583 "vmin", "s", smin, 1>; 5584defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5585 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5586 "vmin", "u", umin, 1>; 5587def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5588 "vmin", "f32", 5589 v2f32, v2f32, fminimum, 1>; 5590def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5591 "vmin", "f32", 5592 v4f32, v4f32, fminimum, 1>; 5593def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, 5594 "vmin", "f16", 5595 v4f16, v4f16, fminimum, 1>, 5596 Requires<[HasNEON, HasFullFP16]>; 5597def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5598 "vmin", "f16", 5599 v8f16, v8f16, fminimum, 1>, 5600 Requires<[HasNEON, HasFullFP16]>; 5601 5602// VMINNM 5603let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5604 def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5605 N3RegFrm, NoItinerary, "vminnm", "f32", 5606 v2f32, v2f32, fminnum, 1>, 5607 Requires<[HasV8, HasNEON]>; 5608 def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5609 N3RegFrm, NoItinerary, "vminnm", "f32", 5610 v4f32, v4f32, fminnum, 1>, 5611 Requires<[HasV8, HasNEON]>; 5612 def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, 5613 N3RegFrm, NoItinerary, "vminnm", "f16", 5614 v4f16, v4f16, fminnum, 1>, 5615 Requires<[HasV8, HasNEON, HasFullFP16]>; 5616 def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, 5617 N3RegFrm, NoItinerary, "vminnm", "f16", 5618 v8f16, v8f16, fminnum, 1>, 5619 Requires<[HasV8, HasNEON, HasFullFP16]>; 5620} 5621 5622// Vector Pairwise Operations. 5623 5624// VPADD : Vector Pairwise Add 5625def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5626 "vpadd", "i8", 5627 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5628def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5629 "vpadd", "i16", 5630 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5631def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5632 "vpadd", "i32", 5633 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5634def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5635 IIC_VPBIND, "vpadd", "f32", 5636 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5637def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, 5638 IIC_VPBIND, "vpadd", "f16", 5639 v4f16, v4f16, int_arm_neon_vpadd, 0>, 5640 Requires<[HasNEON, HasFullFP16]>; 5641 5642// VPADDL : Vector Pairwise Add Long 5643defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5644 int_arm_neon_vpaddls>; 5645defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5646 int_arm_neon_vpaddlu>; 5647 5648// VPADAL : Vector Pairwise Add and Accumulate Long 5649defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5650 int_arm_neon_vpadals>; 5651defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5652 int_arm_neon_vpadalu>; 5653 5654// VPMAX : Vector Pairwise Maximum 5655def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5656 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5657def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5658 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5659def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5660 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5661def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5662 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5663def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5664 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5665def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5666 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5667def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5668 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5669def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5670 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, 5671 Requires<[HasNEON, HasFullFP16]>; 5672 5673// VPMIN : Vector Pairwise Minimum 5674def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5675 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5676def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5677 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5678def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5679 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5680def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5681 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5682def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5683 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5684def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5685 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5686def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5687 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5688def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5689 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, 5690 Requires<[HasNEON, HasFullFP16]>; 5691 5692// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5693 5694// VRECPE : Vector Reciprocal Estimate 5695def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5696 IIC_VUNAD, "vrecpe", "u32", 5697 v2i32, v2i32, int_arm_neon_vrecpe>; 5698def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5699 IIC_VUNAQ, "vrecpe", "u32", 5700 v4i32, v4i32, int_arm_neon_vrecpe>; 5701def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5702 IIC_VUNAD, "vrecpe", "f32", 5703 v2f32, v2f32, int_arm_neon_vrecpe>; 5704def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5705 IIC_VUNAQ, "vrecpe", "f32", 5706 v4f32, v4f32, int_arm_neon_vrecpe>; 5707def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5708 IIC_VUNAD, "vrecpe", "f16", 5709 v4f16, v4f16, int_arm_neon_vrecpe>, 5710 Requires<[HasNEON, HasFullFP16]>; 5711def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5712 IIC_VUNAQ, "vrecpe", "f16", 5713 v8f16, v8f16, int_arm_neon_vrecpe>, 5714 Requires<[HasNEON, HasFullFP16]>; 5715 5716// VRECPS : Vector Reciprocal Step 5717def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5718 IIC_VRECSD, "vrecps", "f32", 5719 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5720def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5721 IIC_VRECSQ, "vrecps", "f32", 5722 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5723def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5724 IIC_VRECSD, "vrecps", "f16", 5725 v4f16, v4f16, int_arm_neon_vrecps, 1>, 5726 Requires<[HasNEON, HasFullFP16]>; 5727def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5728 IIC_VRECSQ, "vrecps", "f16", 5729 v8f16, v8f16, int_arm_neon_vrecps, 1>, 5730 Requires<[HasNEON, HasFullFP16]>; 5731 5732// VRSQRTE : Vector Reciprocal Square Root Estimate 5733def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5734 IIC_VUNAD, "vrsqrte", "u32", 5735 v2i32, v2i32, int_arm_neon_vrsqrte>; 5736def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5737 IIC_VUNAQ, "vrsqrte", "u32", 5738 v4i32, v4i32, int_arm_neon_vrsqrte>; 5739def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5740 IIC_VUNAD, "vrsqrte", "f32", 5741 v2f32, v2f32, int_arm_neon_vrsqrte>; 5742def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5743 IIC_VUNAQ, "vrsqrte", "f32", 5744 v4f32, v4f32, int_arm_neon_vrsqrte>; 5745def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5746 IIC_VUNAD, "vrsqrte", "f16", 5747 v4f16, v4f16, int_arm_neon_vrsqrte>, 5748 Requires<[HasNEON, HasFullFP16]>; 5749def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5750 IIC_VUNAQ, "vrsqrte", "f16", 5751 v8f16, v8f16, int_arm_neon_vrsqrte>, 5752 Requires<[HasNEON, HasFullFP16]>; 5753 5754// VRSQRTS : Vector Reciprocal Square Root Step 5755def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5756 IIC_VRECSD, "vrsqrts", "f32", 5757 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5758def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5759 IIC_VRECSQ, "vrsqrts", "f32", 5760 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5761def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5762 IIC_VRECSD, "vrsqrts", "f16", 5763 v4f16, v4f16, int_arm_neon_vrsqrts, 1>, 5764 Requires<[HasNEON, HasFullFP16]>; 5765def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5766 IIC_VRECSQ, "vrsqrts", "f16", 5767 v8f16, v8f16, int_arm_neon_vrsqrts, 1>, 5768 Requires<[HasNEON, HasFullFP16]>; 5769 5770// Vector Shifts. 5771 5772// VSHL : Vector Shift 5773defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5774 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5775 "vshl", "s", int_arm_neon_vshifts>; 5776defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5777 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5778 "vshl", "u", int_arm_neon_vshiftu>; 5779 5780let Predicates = [HasNEON] in { 5781def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5782 (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>; 5783def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5784 (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>; 5785def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5786 (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>; 5787def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5788 (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>; 5789def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5790 (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>; 5791def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5792 (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>; 5793def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5794 (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>; 5795def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5796 (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>; 5797 5798def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5799 (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>; 5800def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5801 (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>; 5802def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5803 (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>; 5804def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5805 (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>; 5806def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5807 (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>; 5808def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5809 (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>; 5810def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5811 (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>; 5812def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5813 (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>; 5814 5815} 5816 5817// VSHL : Vector Shift Left (Immediate) 5818defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>; 5819 5820// VSHR : Vector Shift Right (Immediate) 5821defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 5822 ARMvshrsImm>; 5823defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 5824 ARMvshruImm>; 5825 5826// VSHLL : Vector Shift Left Long 5827defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 5828 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>; 5829defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 5830 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>; 5831 5832// VSHLL : Vector Shift Left Long (with maximum shift count) 5833class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 5834 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 5835 ValueType OpTy, Operand ImmTy> 5836 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 5837 ResTy, OpTy, ImmTy, null_frag> { 5838 let Inst{21-16} = op21_16; 5839 let DecoderMethod = "DecodeVSHLMaxInstruction"; 5840} 5841def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 5842 v8i16, v8i8, imm8>; 5843def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 5844 v4i32, v4i16, imm16>; 5845def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 5846 v2i64, v2i32, imm32>; 5847 5848let Predicates = [HasNEON] in { 5849def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))), 5850 (VSHLLi8 DPR:$Rn, 8)>; 5851def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))), 5852 (VSHLLi16 DPR:$Rn, 16)>; 5853def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))), 5854 (VSHLLi32 DPR:$Rn, 32)>; 5855def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))), 5856 (VSHLLi8 DPR:$Rn, 8)>; 5857def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))), 5858 (VSHLLi16 DPR:$Rn, 16)>; 5859def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))), 5860 (VSHLLi32 DPR:$Rn, 32)>; 5861def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))), 5862 (VSHLLi8 DPR:$Rn, 8)>; 5863def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))), 5864 (VSHLLi16 DPR:$Rn, 16)>; 5865def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))), 5866 (VSHLLi32 DPR:$Rn, 32)>; 5867} 5868 5869// VSHRN : Vector Shift Right and Narrow 5870defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 5871 PatFrag<(ops node:$Rn, node:$amt), 5872 (trunc (ARMvshrsImm node:$Rn, node:$amt))>>; 5873 5874let Predicates = [HasNEON] in { 5875def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))), 5876 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 5877def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))), 5878 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 5879def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))), 5880 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 5881} 5882 5883// VRSHL : Vector Rounding Shift 5884defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 5885 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5886 "vrshl", "s", int_arm_neon_vrshifts>; 5887defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 5888 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5889 "vrshl", "u", int_arm_neon_vrshiftu>; 5890// VRSHR : Vector Rounding Shift Right 5891defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 5892 NEONvrshrsImm>; 5893defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 5894 NEONvrshruImm>; 5895 5896// VRSHRN : Vector Rounding Shift Right and Narrow 5897defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 5898 NEONvrshrnImm>; 5899 5900// VQSHL : Vector Saturating Shift 5901defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 5902 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5903 "vqshl", "s", int_arm_neon_vqshifts>; 5904defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 5905 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5906 "vqshl", "u", int_arm_neon_vqshiftu>; 5907// VQSHL : Vector Saturating Shift Left (Immediate) 5908defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>; 5909defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>; 5910 5911// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 5912defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>; 5913 5914// VQSHRN : Vector Saturating Shift Right and Narrow 5915defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 5916 NEONvqshrnsImm>; 5917defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 5918 NEONvqshrnuImm>; 5919 5920// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 5921defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 5922 NEONvqshrnsuImm>; 5923 5924// VQRSHL : Vector Saturating Rounding Shift 5925defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 5926 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5927 "vqrshl", "s", int_arm_neon_vqrshifts>; 5928defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 5929 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5930 "vqrshl", "u", int_arm_neon_vqrshiftu>; 5931 5932// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 5933defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 5934 NEONvqrshrnsImm>; 5935defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 5936 NEONvqrshrnuImm>; 5937 5938// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 5939defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 5940 NEONvqrshrnsuImm>; 5941 5942// VSRA : Vector Shift Right and Accumulate 5943defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>; 5944defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>; 5945// VRSRA : Vector Rounding Shift Right and Accumulate 5946defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>; 5947defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>; 5948 5949// VSLI : Vector Shift Left and Insert 5950defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 5951 5952// VSRI : Vector Shift Right and Insert 5953defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 5954 5955// Vector Absolute and Saturating Absolute. 5956 5957// VABS : Vector Absolute Value 5958defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 5959 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; 5960def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5961 "vabs", "f32", 5962 v2f32, v2f32, fabs>; 5963def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5964 "vabs", "f32", 5965 v4f32, v4f32, fabs>; 5966def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 5967 "vabs", "f16", 5968 v4f16, v4f16, fabs>, 5969 Requires<[HasNEON, HasFullFP16]>; 5970def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 5971 "vabs", "f16", 5972 v8f16, v8f16, fabs>, 5973 Requires<[HasNEON, HasFullFP16]>; 5974 5975// VQABS : Vector Saturating Absolute Value 5976defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 5977 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 5978 int_arm_neon_vqabs>; 5979 5980// Vector Negate. 5981 5982def vnegd : PatFrag<(ops node:$in), 5983 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 5984def vnegq : PatFrag<(ops node:$in), 5985 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 5986 5987class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5988 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 5989 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 5990 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 5991class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5992 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 5993 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 5994 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 5995 5996// VNEG : Vector Negate (integer) 5997def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 5998def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 5999def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 6000def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 6001def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 6002def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 6003 6004// VNEG : Vector Negate (floating-point) 6005def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 6006 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6007 "vneg", "f32", "$Vd, $Vm", "", 6008 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 6009def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 6010 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6011 "vneg", "f32", "$Vd, $Vm", "", 6012 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 6013def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, 6014 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6015 "vneg", "f16", "$Vd, $Vm", "", 6016 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, 6017 Requires<[HasNEON, HasFullFP16]>; 6018def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, 6019 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6020 "vneg", "f16", "$Vd, $Vm", "", 6021 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, 6022 Requires<[HasNEON, HasFullFP16]>; 6023 6024let Predicates = [HasNEON] in { 6025def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 6026def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 6027def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 6028def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 6029def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 6030def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 6031} 6032 6033// VQNEG : Vector Saturating Negate 6034defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 6035 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 6036 int_arm_neon_vqneg>; 6037 6038// Vector Bit Counting Operations. 6039 6040// VCLS : Vector Count Leading Sign Bits 6041defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 6042 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 6043 int_arm_neon_vcls>; 6044// VCLZ : Vector Count Leading Zeros 6045defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 6046 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 6047 ctlz>; 6048// VCNT : Vector Count One Bits 6049def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6050 IIC_VCNTiD, "vcnt", "8", 6051 v8i8, v8i8, ctpop>; 6052def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6053 IIC_VCNTiQ, "vcnt", "8", 6054 v16i8, v16i8, ctpop>; 6055 6056// Vector Swap 6057def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 6058 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 6059 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6060 []>; 6061def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 6062 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 6063 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6064 []>; 6065 6066// Vector Move Operations. 6067 6068// VMOV : Vector Move (Register) 6069def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6070 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6071def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6072 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6073 6074// VMOV : Vector Move (Immediate) 6075 6076// Although VMOVs are not strictly speaking cheap, they are as expensive 6077// as their copies counterpart (VORR), so we should prefer rematerialization 6078// over splitting when it applies. 6079let isReMaterializable = 1, isAsCheapAsAMove=1 in { 6080def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 6081 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6082 "vmov", "i8", "$Vd, $SIMM", "", 6083 [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>; 6084def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 6085 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6086 "vmov", "i8", "$Vd, $SIMM", "", 6087 [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>; 6088 6089def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 6090 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6091 "vmov", "i16", "$Vd, $SIMM", "", 6092 [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> { 6093 let Inst{9} = SIMM{9}; 6094} 6095 6096def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 6097 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6098 "vmov", "i16", "$Vd, $SIMM", "", 6099 [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> { 6100 let Inst{9} = SIMM{9}; 6101} 6102 6103def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 6104 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6105 "vmov", "i32", "$Vd, $SIMM", "", 6106 [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> { 6107 let Inst{11-8} = SIMM{11-8}; 6108} 6109 6110def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 6111 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6112 "vmov", "i32", "$Vd, $SIMM", "", 6113 [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> { 6114 let Inst{11-8} = SIMM{11-8}; 6115} 6116 6117def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 6118 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6119 "vmov", "i64", "$Vd, $SIMM", "", 6120 [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>; 6121def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 6122 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6123 "vmov", "i64", "$Vd, $SIMM", "", 6124 [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>; 6125 6126def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 6127 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6128 "vmov", "f32", "$Vd, $SIMM", "", 6129 [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>; 6130def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 6131 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6132 "vmov", "f32", "$Vd, $SIMM", "", 6133 [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>; 6134} // isReMaterializable, isAsCheapAsAMove 6135 6136// Add support for bytes replication feature, so it could be GAS compatible. 6137multiclass NEONImmReplicateI8InstAlias<ValueType To> { 6138 // E.g. instructions below: 6139 // "vmov.i32 d0, #0xffffffff" 6140 // "vmov.i32 d0, #0xabababab" 6141 // "vmov.i16 d0, #0xabab" 6142 // are incorrect, but we could deal with such cases. 6143 // For last two instructions, for example, it should emit: 6144 // "vmov.i8 d0, #0xab" 6145 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6146 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6147 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6148 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6149 // Also add same support for VMVN instructions. So instruction: 6150 // "vmvn.i32 d0, #0xabababab" 6151 // actually means: 6152 // "vmov.i8 d0, #0x54" 6153 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6154 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6155 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6156 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6157} 6158 6159defm : NEONImmReplicateI8InstAlias<i16>; 6160defm : NEONImmReplicateI8InstAlias<i32>; 6161defm : NEONImmReplicateI8InstAlias<i64>; 6162 6163// Similar to above for types other than i8, e.g.: 6164// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00" 6165// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000" 6166// In this case we do not canonicalize VMVN to VMOV 6167multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16, 6168 NeonI NV8, NeonI NV16, ValueType To> { 6169 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6170 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6171 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6172 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6173 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6174 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6175 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6176 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6177} 6178 6179defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6180 VMVNv4i16, VMVNv8i16, i32>; 6181defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6182 VMVNv4i16, VMVNv8i16, i64>; 6183defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32, 6184 VMVNv2i32, VMVNv4i32, i64>; 6185// TODO: add "VMOV <-> VMVN" conversion for cases like 6186// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55" 6187// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00" 6188 6189// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 6190// require zero cycles to execute so they should be used wherever possible for 6191// setting a register to zero. 6192 6193// Even without these pseudo-insts we would probably end up with the correct 6194// instruction, but we could not mark the general ones with "isAsCheapAsAMove" 6195// since they are sometimes rather expensive (in general). 6196 6197let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 6198 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 6199 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], 6200 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 6201 Requires<[HasZCZ]>; 6202 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 6203 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], 6204 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 6205 Requires<[HasZCZ]>; 6206} 6207 6208// VMOV : Vector Get Lane (move scalar to ARM core register) 6209 6210def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 6211 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6212 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 6213 [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V), 6214 imm:$lane))]> { 6215 let Inst{21} = lane{2}; 6216 let Inst{6-5} = lane{1-0}; 6217} 6218def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 6219 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6220 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 6221 [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V), 6222 imm:$lane))]> { 6223 let Inst{21} = lane{1}; 6224 let Inst{6} = lane{0}; 6225} 6226def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 6227 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6228 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 6229 [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V), 6230 imm:$lane))]> { 6231 let Inst{21} = lane{2}; 6232 let Inst{6-5} = lane{1-0}; 6233} 6234def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 6235 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6236 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 6237 [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V), 6238 imm:$lane))]> { 6239 let Inst{21} = lane{1}; 6240 let Inst{6} = lane{0}; 6241} 6242def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 6243 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 6244 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 6245 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 6246 imm:$lane))]>, 6247 Requires<[HasFPRegs, HasFastVGETLNi32]> { 6248 let Inst{21} = lane{0}; 6249} 6250let Predicates = [HasNEON] in { 6251// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 6252def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane), 6253 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6254 (DSubReg_i8_reg imm:$lane))), 6255 (SubReg_i8_lane imm:$lane))>; 6256def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane), 6257 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6258 (DSubReg_i16_reg imm:$lane))), 6259 (SubReg_i16_lane imm:$lane))>; 6260def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane), 6261 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6262 (DSubReg_i8_reg imm:$lane))), 6263 (SubReg_i8_lane imm:$lane))>; 6264def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane), 6265 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6266 (DSubReg_i16_reg imm:$lane))), 6267 (SubReg_i16_lane imm:$lane))>; 6268} 6269def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6270 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 6271 (DSubReg_i32_reg imm:$lane))), 6272 (SubReg_i32_lane imm:$lane))>, 6273 Requires<[HasNEON, HasFastVGETLNi32]>; 6274def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 6275 (COPY_TO_REGCLASS 6276 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6277 Requires<[HasNEON, HasSlowVGETLNi32]>; 6278def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6279 (COPY_TO_REGCLASS 6280 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6281 Requires<[HasNEON, HasSlowVGETLNi32]>; 6282let Predicates = [HasNEON] in { 6283def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 6284 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 6285 (SSubReg_f32_reg imm:$src2))>; 6286def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 6287 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 6288 (SSubReg_f32_reg imm:$src2))>; 6289//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 6290// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6291def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 6292 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6293} 6294 6295def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>; 6296def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>; 6297 6298let Predicates = [HasNEON] in { 6299def : Pat<(extractelt (v4f16 DPR:$src), imm_even:$lane), 6300 (EXTRACT_SUBREG 6301 (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)), 6302 (SSubReg_f16_reg imm_even:$lane))>; 6303 6304def : Pat<(extractelt (v4f16 DPR:$src), imm_odd:$lane), 6305 (COPY_TO_REGCLASS 6306 (VMOVH (EXTRACT_SUBREG 6307 (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)), 6308 (SSubReg_f16_reg imm_odd:$lane))), 6309 HPR)>; 6310 6311def : Pat<(extractelt (v8f16 QPR:$src), imm_even:$lane), 6312 (EXTRACT_SUBREG 6313 (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)), 6314 (SSubReg_f16_reg imm_even:$lane))>; 6315 6316def : Pat<(extractelt (v8f16 QPR:$src), imm_odd:$lane), 6317 (COPY_TO_REGCLASS 6318 (VMOVH (EXTRACT_SUBREG 6319 (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)), 6320 (SSubReg_f16_reg imm_odd:$lane))), 6321 HPR)>; 6322} 6323 6324// VMOV : Vector Set Lane (move ARM core register to scalar) 6325 6326let Constraints = "$src1 = $V" in { 6327def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 6328 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 6329 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 6330 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 6331 GPR:$R, imm:$lane))]> { 6332 let Inst{21} = lane{2}; 6333 let Inst{6-5} = lane{1-0}; 6334} 6335def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 6336 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 6337 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 6338 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 6339 GPR:$R, imm:$lane))]> { 6340 let Inst{21} = lane{1}; 6341 let Inst{6} = lane{0}; 6342} 6343def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 6344 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 6345 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 6346 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 6347 GPR:$R, imm:$lane))]>, 6348 Requires<[HasVFP2]> { 6349 let Inst{21} = lane{0}; 6350 // This instruction is equivalent as 6351 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 6352 let isInsertSubreg = 1; 6353} 6354} 6355 6356let Predicates = [HasNEON] in { 6357def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 6358 (v16i8 (INSERT_SUBREG QPR:$src1, 6359 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 6360 (DSubReg_i8_reg imm:$lane))), 6361 GPR:$src2, (SubReg_i8_lane imm:$lane))), 6362 (DSubReg_i8_reg imm:$lane)))>; 6363def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 6364 (v8i16 (INSERT_SUBREG QPR:$src1, 6365 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6366 (DSubReg_i16_reg imm:$lane))), 6367 GPR:$src2, (SubReg_i16_lane imm:$lane))), 6368 (DSubReg_i16_reg imm:$lane)))>; 6369def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 6370 (v4i32 (INSERT_SUBREG QPR:$src1, 6371 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 6372 (DSubReg_i32_reg imm:$lane))), 6373 GPR:$src2, (SubReg_i32_lane imm:$lane))), 6374 (DSubReg_i32_reg imm:$lane)))>; 6375 6376def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 6377 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 6378 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6379def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 6380 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 6381 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6382 6383def : Pat<(insertelt (v4f16 DPR:$src1), HPR:$src2, imm:$lane), 6384 (v4f16 (VSETLNi16 DPR:$src1, (VMOVRH $src2), imm:$lane))>; 6385def : Pat<(insertelt (v8f16 QPR:$src1), HPR:$src2, imm:$lane), 6386 (v8f16 (INSERT_SUBREG QPR:$src1, 6387 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6388 (DSubReg_i16_reg imm:$lane))), 6389 (VMOVRH $src2), (SubReg_i16_lane imm:$lane))), 6390 (DSubReg_i16_reg imm:$lane)))>; 6391 6392//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6393// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6394def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6395 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6396 6397def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 6398 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6399def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 6400 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 6401def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 6402 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6403 6404def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 6405 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6406def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 6407 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6408def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 6409 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6410 6411def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 6412 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6413 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6414 dsub_0)>; 6415def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 6416 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6417 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6418 dsub_0)>; 6419def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 6420 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6421 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6422 dsub_0)>; 6423} 6424 6425// VDUP : Vector Duplicate (from ARM core register to all elements) 6426 6427class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6428 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 6429 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6430 [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6431class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6432 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 6433 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6434 [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6435 6436def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 6437def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 6438def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 6439 Requires<[HasNEON, HasFastVDUP32]>; 6440def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 6441def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 6442def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 6443 6444// ARMvdup patterns for uarchs with fast VDUP.32. 6445def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 6446 Requires<[HasNEON,HasFastVDUP32]>; 6447def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>, 6448 Requires<[HasNEON]>; 6449 6450// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 6451def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 6452 Requires<[HasNEON,HasSlowVDUP32]>; 6453def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 6454 Requires<[HasNEON,HasSlowVDUP32]>; 6455 6456// VDUP : Vector Duplicate Lane (from scalar to all elements) 6457 6458class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 6459 ValueType Ty, Operand IdxTy> 6460 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6461 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 6462 [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>; 6463 6464class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 6465 ValueType ResTy, ValueType OpTy, Operand IdxTy> 6466 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6467 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 6468 [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm), 6469 VectorIndex32:$lane)))]>; 6470 6471// Inst{19-16} is partially specified depending on the element size. 6472 6473def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 6474 bits<3> lane; 6475 let Inst{19-17} = lane{2-0}; 6476} 6477def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 6478 bits<2> lane; 6479 let Inst{19-18} = lane{1-0}; 6480} 6481def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 6482 bits<1> lane; 6483 let Inst{19} = lane{0}; 6484} 6485def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 6486 bits<3> lane; 6487 let Inst{19-17} = lane{2-0}; 6488} 6489def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 6490 bits<2> lane; 6491 let Inst{19-18} = lane{1-0}; 6492} 6493def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 6494 bits<1> lane; 6495 let Inst{19} = lane{0}; 6496} 6497 6498let Predicates = [HasNEON] in { 6499def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)), 6500 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6501 6502def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6503 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6504 6505def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6506 (VDUPLN32q DPR:$Vm, imm:$lane)>; 6507 6508def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)), 6509 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 6510 (DSubReg_i8_reg imm:$lane))), 6511 (SubReg_i8_lane imm:$lane)))>; 6512def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)), 6513 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 6514 (DSubReg_i16_reg imm:$lane))), 6515 (SubReg_i16_lane imm:$lane)))>; 6516def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)), 6517 (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src, 6518 (DSubReg_i16_reg imm:$lane))), 6519 (SubReg_i16_lane imm:$lane)))>; 6520def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)), 6521 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 6522 (DSubReg_i32_reg imm:$lane))), 6523 (SubReg_i32_lane imm:$lane)))>; 6524def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)), 6525 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 6526 (DSubReg_i32_reg imm:$lane))), 6527 (SubReg_i32_lane imm:$lane)))>; 6528 6529def : Pat<(v4f16 (ARMvdup HPR:$src)), 6530 (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6531 HPR:$src, ssub_0), (i32 0)))>; 6532def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))), 6533 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6534 SPR:$src, ssub_0), (i32 0)))>; 6535def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))), 6536 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6537 SPR:$src, ssub_0), (i32 0)))>; 6538def : Pat<(v8f16 (ARMvdup HPR:$src)), 6539 (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6540 HPR:$src, ssub_0), (i32 0)))>; 6541} 6542 6543// VMOVN : Vector Narrowing Move 6544defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 6545 "vmovn", "i", trunc>; 6546// VQMOVN : Vector Saturating Narrowing Move 6547defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 6548 "vqmovn", "s", int_arm_neon_vqmovns>; 6549defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 6550 "vqmovn", "u", int_arm_neon_vqmovnu>; 6551defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 6552 "vqmovun", "s", int_arm_neon_vqmovnsu>; 6553// VMOVL : Vector Lengthening Move 6554defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 6555defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 6556 6557let Predicates = [HasNEON] in { 6558def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 6559def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 6560def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 6561} 6562 6563// Vector Conversions. 6564 6565// VCVT : Vector Convert Between Floating-Point and Integers 6566def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6567 v2i32, v2f32, fp_to_sint>; 6568def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6569 v2i32, v2f32, fp_to_uint>; 6570def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6571 v2f32, v2i32, sint_to_fp>; 6572def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6573 v2f32, v2i32, uint_to_fp>; 6574 6575def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6576 v4i32, v4f32, fp_to_sint>; 6577def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6578 v4i32, v4f32, fp_to_uint>; 6579def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6580 v4f32, v4i32, sint_to_fp>; 6581def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6582 v4f32, v4i32, uint_to_fp>; 6583 6584def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6585 v4i16, v4f16, fp_to_sint>, 6586 Requires<[HasNEON, HasFullFP16]>; 6587def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6588 v4i16, v4f16, fp_to_uint>, 6589 Requires<[HasNEON, HasFullFP16]>; 6590def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6591 v4f16, v4i16, sint_to_fp>, 6592 Requires<[HasNEON, HasFullFP16]>; 6593def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6594 v4f16, v4i16, uint_to_fp>, 6595 Requires<[HasNEON, HasFullFP16]>; 6596 6597def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6598 v8i16, v8f16, fp_to_sint>, 6599 Requires<[HasNEON, HasFullFP16]>; 6600def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6601 v8i16, v8f16, fp_to_uint>, 6602 Requires<[HasNEON, HasFullFP16]>; 6603def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6604 v8f16, v8i16, sint_to_fp>, 6605 Requires<[HasNEON, HasFullFP16]>; 6606def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6607 v8f16, v8i16, uint_to_fp>, 6608 Requires<[HasNEON, HasFullFP16]>; 6609 6610// VCVT{A, N, P, M} 6611multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 6612 SDPatternOperator IntU> { 6613 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6614 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6615 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 6616 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6617 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 6618 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6619 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 6620 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6621 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 6622 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6623 "s16.f16", v4i16, v4f16, IntS>, 6624 Requires<[HasV8, HasNEON, HasFullFP16]>; 6625 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6626 "s16.f16", v8i16, v8f16, IntS>, 6627 Requires<[HasV8, HasNEON, HasFullFP16]>; 6628 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6629 "u16.f16", v4i16, v4f16, IntU>, 6630 Requires<[HasV8, HasNEON, HasFullFP16]>; 6631 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6632 "u16.f16", v8i16, v8f16, IntU>, 6633 Requires<[HasV8, HasNEON, HasFullFP16]>; 6634 } 6635} 6636 6637defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 6638defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 6639defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 6640defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 6641 6642// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 6643let DecoderMethod = "DecodeVCVTD" in { 6644def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6645 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 6646def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6647 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 6648def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6649 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 6650def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6651 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 6652let Predicates = [HasNEON, HasFullFP16] in { 6653def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6654 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; 6655def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6656 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; 6657def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6658 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; 6659def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6660 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; 6661} // Predicates = [HasNEON, HasFullFP16] 6662} 6663 6664let DecoderMethod = "DecodeVCVTQ" in { 6665def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6666 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 6667def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6668 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 6669def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6670 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 6671def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6672 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 6673let Predicates = [HasNEON, HasFullFP16] in { 6674def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6675 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; 6676def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6677 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; 6678def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6679 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; 6680def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6681 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; 6682} // Predicates = [HasNEON, HasFullFP16] 6683} 6684 6685def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 6686 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6687def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 6688 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6689def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 6690 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6691def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 6692 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6693 6694def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 6695 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6696def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 6697 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6698def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 6699 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6700def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 6701 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6702 6703def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", 6704 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6705def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", 6706 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6707def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", 6708 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6709def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", 6710 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6711 6712def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", 6713 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6714def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", 6715 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6716def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", 6717 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6718def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", 6719 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6720 6721 6722// VCVT : Vector Convert Between Half-Precision and Single-Precision. 6723def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 6724 IIC_VUNAQ, "vcvt", "f16.f32", 6725 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 6726 Requires<[HasNEON, HasFP16]>; 6727def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 6728 IIC_VUNAQ, "vcvt", "f32.f16", 6729 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 6730 Requires<[HasNEON, HasFP16]>; 6731 6732// Vector Reverse. 6733 6734// VREV64 : Vector Reverse elements within 64-bit doublewords 6735 6736class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6737 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 6738 (ins DPR:$Vm), IIC_VMOVD, 6739 OpcodeStr, Dt, "$Vd, $Vm", "", 6740 [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>; 6741class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6742 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 6743 (ins QPR:$Vm), IIC_VMOVQ, 6744 OpcodeStr, Dt, "$Vd, $Vm", "", 6745 [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>; 6746 6747def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 6748def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 6749def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 6750let Predicates = [HasNEON] in { 6751def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 6752} 6753 6754def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 6755def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 6756def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 6757 6758let Predicates = [HasNEON] in { 6759def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 6760def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>; 6761def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>; 6762} 6763 6764// VREV32 : Vector Reverse elements within 32-bit words 6765 6766class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6767 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 6768 (ins DPR:$Vm), IIC_VMOVD, 6769 OpcodeStr, Dt, "$Vd, $Vm", "", 6770 [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>; 6771class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6772 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 6773 (ins QPR:$Vm), IIC_VMOVQ, 6774 OpcodeStr, Dt, "$Vd, $Vm", "", 6775 [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>; 6776 6777def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 6778def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 6779 6780def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 6781def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 6782 6783// VREV16 : Vector Reverse elements within 16-bit halfwords 6784 6785class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6786 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 6787 (ins DPR:$Vm), IIC_VMOVD, 6788 OpcodeStr, Dt, "$Vd, $Vm", "", 6789 [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>; 6790class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6791 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 6792 (ins QPR:$Vm), IIC_VMOVQ, 6793 OpcodeStr, Dt, "$Vd, $Vm", "", 6794 [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>; 6795 6796def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 6797def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 6798 6799// Other Vector Shuffles. 6800 6801// Aligned extractions: really just dropping registers 6802 6803class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 6804 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 6805 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>, 6806 Requires<[HasNEON]>; 6807 6808def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 6809 6810def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 6811 6812def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 6813 6814def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 6815 6816def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 6817 6818def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16 6819 6820// VEXT : Vector Extract 6821 6822 6823// All of these have a two-operand InstAlias. 6824let TwoOperandAliasConstraint = "$Vn = $Vd" in { 6825class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6826 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 6827 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 6828 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6829 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 6830 (Ty DPR:$Vm), imm:$index)))]> { 6831 bits<3> index; 6832 let Inst{11} = 0b0; 6833 let Inst{10-8} = index{2-0}; 6834} 6835 6836class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6837 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 6838 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 6839 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6840 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 6841 (Ty QPR:$Vm), imm:$index)))]> { 6842 bits<4> index; 6843 let Inst{11-8} = index{3-0}; 6844} 6845} 6846 6847def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 6848 let Inst{10-8} = index{2-0}; 6849} 6850def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 6851 let Inst{10-9} = index{1-0}; 6852 let Inst{8} = 0b0; 6853} 6854let Predicates = [HasNEON] in { 6855def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))), 6856 (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>; 6857} 6858 6859def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 6860 let Inst{10} = index{0}; 6861 let Inst{9-8} = 0b00; 6862} 6863let Predicates = [HasNEON] in { 6864def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))), 6865 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 6866} 6867 6868def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 6869 let Inst{11-8} = index{3-0}; 6870} 6871def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 6872 let Inst{11-9} = index{2-0}; 6873 let Inst{8} = 0b0; 6874} 6875let Predicates = [HasNEON] in { 6876def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))), 6877 (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>; 6878} 6879 6880def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 6881 let Inst{11-10} = index{1-0}; 6882 let Inst{9-8} = 0b00; 6883} 6884def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 6885 let Inst{11} = index{0}; 6886 let Inst{10-8} = 0b000; 6887} 6888let Predicates = [HasNEON] in { 6889def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), 6890 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 6891} 6892 6893// VTRN : Vector Transpose 6894 6895def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 6896def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 6897def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 6898 6899def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 6900def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 6901def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 6902 6903// VUZP : Vector Unzip (Deinterleave) 6904 6905def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 6906def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 6907// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6908def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 6909 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6910 6911def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 6912def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 6913def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 6914 6915// VZIP : Vector Zip (Interleave) 6916 6917def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 6918def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 6919// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6920def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 6921 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6922 6923def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 6924def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 6925def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 6926 6927// Vector Table Lookup and Table Extension. 6928 6929// VTBL : Vector Table Lookup 6930let DecoderMethod = "DecodeTBLInstruction" in { 6931def VTBL1 6932 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 6933 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 6934 "vtbl", "8", "$Vd, $Vn, $Vm", "", 6935 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 6936 6937let hasExtraSrcRegAllocReq = 1 in { 6938def VTBL2 6939 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 6940 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 6941 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6942def VTBL3 6943 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 6944 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 6945 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6946def VTBL4 6947 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 6948 (ins VecListFourD:$Vn, DPR:$Vm), 6949 NVTBLFrm, IIC_VTB4, 6950 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6951} // hasExtraSrcRegAllocReq = 1 6952 6953def VTBL3Pseudo 6954 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 6955def VTBL4Pseudo 6956 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 6957 6958// VTBX : Vector Table Extension 6959def VTBX1 6960 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 6961 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 6962 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 6963 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 6964 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 6965let hasExtraSrcRegAllocReq = 1 in { 6966def VTBX2 6967 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 6968 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 6969 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 6970def VTBX3 6971 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 6972 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 6973 NVTBLFrm, IIC_VTBX3, 6974 "vtbx", "8", "$Vd, $Vn, $Vm", 6975 "$orig = $Vd", []>; 6976def VTBX4 6977 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 6978 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 6979 "vtbx", "8", "$Vd, $Vn, $Vm", 6980 "$orig = $Vd", []>; 6981} // hasExtraSrcRegAllocReq = 1 6982 6983def VTBX3Pseudo 6984 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6985 IIC_VTBX3, "$orig = $dst", []>; 6986def VTBX4Pseudo 6987 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6988 IIC_VTBX4, "$orig = $dst", []>; 6989} // DecoderMethod = "DecodeTBLInstruction" 6990 6991let Predicates = [HasNEON] in { 6992def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), 6993 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 6994 v8i8:$Vn1, dsub_1), 6995 v8i8:$Vm))>; 6996def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 6997 v8i8:$Vm)), 6998 (v8i8 (VTBX2 v8i8:$orig, 6999 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7000 v8i8:$Vn1, dsub_1), 7001 v8i8:$Vm))>; 7002 7003def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, 7004 v8i8:$Vn2, v8i8:$Vm)), 7005 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7006 v8i8:$Vn1, dsub_1, 7007 v8i8:$Vn2, dsub_2, 7008 (v8i8 (IMPLICIT_DEF)), dsub_3), 7009 v8i8:$Vm))>; 7010def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7011 v8i8:$Vn2, v8i8:$Vm)), 7012 (v8i8 (VTBX3Pseudo v8i8:$orig, 7013 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7014 v8i8:$Vn1, dsub_1, 7015 v8i8:$Vn2, dsub_2, 7016 (v8i8 (IMPLICIT_DEF)), dsub_3), 7017 v8i8:$Vm))>; 7018 7019def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, 7020 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7021 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7022 v8i8:$Vn1, dsub_1, 7023 v8i8:$Vn2, dsub_2, 7024 v8i8:$Vn3, dsub_3), 7025 v8i8:$Vm))>; 7026def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7027 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7028 (v8i8 (VTBX4Pseudo v8i8:$orig, 7029 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7030 v8i8:$Vn1, dsub_1, 7031 v8i8:$Vn2, dsub_2, 7032 v8i8:$Vn3, dsub_3), 7033 v8i8:$Vm))>; 7034} 7035 7036// VRINT : Vector Rounding 7037multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 7038 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 7039 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7040 !strconcat("vrint", op), "f32", 7041 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 7042 let Inst{9-7} = op9_7; 7043 } 7044 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7045 !strconcat("vrint", op), "f32", 7046 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 7047 let Inst{9-7} = op9_7; 7048 } 7049 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7050 !strconcat("vrint", op), "f16", 7051 v4f16, v4f16, Int>, 7052 Requires<[HasV8, HasNEON, HasFullFP16]> { 7053 let Inst{9-7} = op9_7; 7054 } 7055 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7056 !strconcat("vrint", op), "f16", 7057 v8f16, v8f16, Int>, 7058 Requires<[HasV8, HasNEON, HasFullFP16]> { 7059 let Inst{9-7} = op9_7; 7060 } 7061 } 7062 7063 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 7064 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>; 7065 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 7066 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; 7067 let Predicates = [HasNEON, HasFullFP16] in { 7068 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"), 7069 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; 7070 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"), 7071 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; 7072 } 7073} 7074 7075defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 7076defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 7077defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 7078defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 7079defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 7080defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 7081 7082// Cryptography instructions 7083let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 7084 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 7085 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 7086 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7087 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 7088 Requires<[HasV8, HasCrypto]>; 7089 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 7090 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7091 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 7092 Requires<[HasV8, HasCrypto]>; 7093 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7094 SDPatternOperator Int> 7095 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7096 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 7097 Requires<[HasV8, HasCrypto]>; 7098 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7099 SDPatternOperator Int> 7100 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7101 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 7102 Requires<[HasV8, HasCrypto]>; 7103 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 7104 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 7105 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, 7106 Requires<[HasV8, HasCrypto]>; 7107} 7108 7109def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 7110def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 7111def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 7112def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 7113 7114def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 7115def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 7116def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 7117def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 7118def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 7119def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 7120def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 7121def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 7122def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 7123def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 7124 7125let Predicates = [HasNEON] in { 7126def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 7127 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 7128 (SHA1H (SUBREG_TO_REG (i64 0), 7129 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 7130 ssub_0)), 7131 ssub_0)), GPR)>; 7132 7133def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7134 (SHA1C v4i32:$hash_abcd, 7135 (SUBREG_TO_REG (i64 0), 7136 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7137 ssub_0), 7138 v4i32:$wk)>; 7139 7140def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7141 (SHA1M v4i32:$hash_abcd, 7142 (SUBREG_TO_REG (i64 0), 7143 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7144 ssub_0), 7145 v4i32:$wk)>; 7146 7147def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7148 (SHA1P v4i32:$hash_abcd, 7149 (SUBREG_TO_REG (i64 0), 7150 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7151 ssub_0), 7152 v4i32:$wk)>; 7153} 7154 7155//===----------------------------------------------------------------------===// 7156// NEON instructions for single-precision FP math 7157//===----------------------------------------------------------------------===// 7158 7159class N2VSPat<SDNode OpNode, NeonI Inst> 7160 : NEONFPPat<(f32 (OpNode SPR:$a)), 7161 (EXTRACT_SUBREG 7162 (v2f32 (COPY_TO_REGCLASS (Inst 7163 (INSERT_SUBREG 7164 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7165 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 7166 7167class N3VSPat<SDNode OpNode, NeonI Inst> 7168 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 7169 (EXTRACT_SUBREG 7170 (v2f32 (COPY_TO_REGCLASS (Inst 7171 (INSERT_SUBREG 7172 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7173 SPR:$a, ssub_0), 7174 (INSERT_SUBREG 7175 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7176 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7177 7178class N3VSPatFP16<SDNode OpNode, NeonI Inst> 7179 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)), 7180 (EXTRACT_SUBREG 7181 (v4f16 (COPY_TO_REGCLASS (Inst 7182 (INSERT_SUBREG 7183 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7184 HPR:$a, ssub_0), 7185 (INSERT_SUBREG 7186 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7187 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7188 7189class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 7190 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 7191 (EXTRACT_SUBREG 7192 (v2f32 (COPY_TO_REGCLASS (Inst 7193 (INSERT_SUBREG 7194 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7195 SPR:$acc, ssub_0), 7196 (INSERT_SUBREG 7197 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7198 SPR:$a, ssub_0), 7199 (INSERT_SUBREG 7200 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7201 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7202 7203class NVCVTIFPat<SDNode OpNode, NeonI Inst> 7204 : NEONFPPat<(f32 (OpNode GPR:$a)), 7205 (f32 (EXTRACT_SUBREG 7206 (v2f32 (Inst 7207 (INSERT_SUBREG 7208 (v2f32 (IMPLICIT_DEF)), 7209 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 7210 ssub_0))>; 7211class NVCVTFIPat<SDNode OpNode, NeonI Inst> 7212 : NEONFPPat<(i32 (OpNode SPR:$a)), 7213 (i32 (EXTRACT_SUBREG 7214 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 7215 SPR:$a, ssub_0))), 7216 ssub_0))>; 7217 7218def : N3VSPat<fadd, VADDfd>; 7219def : N3VSPat<fsub, VSUBfd>; 7220def : N3VSPat<fmul, VMULfd>; 7221def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 7222 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7223def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 7224 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7225def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 7226 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7227def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 7228 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7229def : N2VSPat<fabs, VABSfd>; 7230def : N2VSPat<fneg, VNEGfd>; 7231def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>; 7232def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>; 7233def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>; 7234def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>; 7235def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 7236def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 7237def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 7238def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 7239 7240// NEON doesn't have any f64 conversions, so provide patterns to make 7241// sure the VFP conversions match when extracting from a vector. 7242def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7243 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7244def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7245 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7246def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7247 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7248def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7249 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7250 7251 7252// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 7253def : Pat<(f32 (bitconvert GPR:$a)), 7254 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7255 Requires<[HasNEON, DontUseVMOVSR]>; 7256def : Pat<(arm_vmovsr GPR:$a), 7257 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7258 Requires<[HasNEON, DontUseVMOVSR]>; 7259 7260//===----------------------------------------------------------------------===// 7261// Non-Instruction Patterns or Endiness - Revert Patterns 7262//===----------------------------------------------------------------------===// 7263 7264// bit_convert 7265// 64 bit conversions 7266let Predicates = [HasNEON] in { 7267def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 7268def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 7269 7270def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 7271def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 7272 7273def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>; 7274def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>; 7275 7276// 128 bit conversions 7277def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 7278def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 7279 7280def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 7281def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 7282 7283def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; 7284def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>; 7285} 7286 7287let Predicates = [IsLE,HasNEON] in { 7288 // 64 bit conversions 7289 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 7290 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 7291 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>; 7292 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 7293 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 7294 7295 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 7296 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 7297 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>; 7298 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 7299 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 7300 7301 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 7302 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 7303 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>; 7304 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 7305 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 7306 7307 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 7308 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 7309 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>; 7310 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 7311 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 7312 7313 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>; 7314 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>; 7315 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>; 7316 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>; 7317 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>; 7318 7319 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 7320 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 7321 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 7322 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 7323 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 7324 7325 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 7326 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 7327 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 7328 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 7329 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>; 7330 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 7331 7332 // 128 bit conversions 7333 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 7334 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 7335 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; 7336 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 7337 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 7338 7339 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 7340 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 7341 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>; 7342 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 7343 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 7344 7345 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 7346 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 7347 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>; 7348 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 7349 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 7350 7351 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 7352 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 7353 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>; 7354 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 7355 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 7356 7357 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; 7358 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>; 7359 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>; 7360 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>; 7361 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>; 7362 7363 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 7364 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 7365 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 7366 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 7367 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 7368 7369 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 7370 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 7371 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 7372 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 7373 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; 7374 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 7375} 7376 7377let Predicates = [IsBE,HasNEON] in { 7378 // 64 bit conversions 7379 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7380 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7381 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7382 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7383 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7384 7385 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7386 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7387 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7388 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7389 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7390 7391 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7392 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7393 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7394 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7395 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7396 7397 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7398 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7399 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7400 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7401 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7402 7403 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7404 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7405 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7406 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7407 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7408 7409 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7410 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7411 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7412 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7413 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7414 7415 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 7416 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 7417 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 7418 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 7419 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>; 7420 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 7421 7422 // 128 bit conversions 7423 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7424 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7425 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7426 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7427 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7428 7429 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7430 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7431 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7432 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7433 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7434 7435 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7436 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7437 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7438 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7439 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7440 7441 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7442 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7443 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7444 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7445 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7446 7447 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7448 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7449 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7450 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7451 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7452 7453 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7454 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7455 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7456 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7457 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7458 7459 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 7460 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 7461 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 7462 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 7463 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>; 7464 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 7465} 7466 7467// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian 7468let Predicates = [IsBE,HasNEON] in { 7469def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 7470 (VREV64q8 (VLD1q8 addrmode6:$addr))>; 7471def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7472 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>; 7473def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 7474 (VREV64q16 (VLD1q16 addrmode6:$addr))>; 7475def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7476 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>; 7477} 7478 7479// Fold extracting an element out of a v2i32 into a vfp register. 7480def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 7481 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>, 7482 Requires<[HasNEON]>; 7483 7484// Vector lengthening move with load, matching extending loads. 7485 7486// extload, zextload and sextload for a standard lengthening load. Example: 7487// Lengthen_Single<"8", "i16", "8"> = 7488// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 7489// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 7490// (f64 (IMPLICIT_DEF)), (i32 0)))>; 7491multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 7492 let AddedComplexity = 10 in { 7493 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7494 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 7495 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7496 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7497 Requires<[HasNEON]>; 7498 7499 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7500 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 7501 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7502 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7503 Requires<[HasNEON]>; 7504 7505 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7506 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 7507 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 7508 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7509 Requires<[HasNEON]>; 7510 } 7511} 7512 7513// extload, zextload and sextload for a lengthening load which only uses 7514// half the lanes available. Example: 7515// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 7516// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 7517// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7518// (f64 (IMPLICIT_DEF)), (i32 0))), 7519// dsub_0)>; 7520multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 7521 string InsnLanes, string InsnTy> { 7522 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7523 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7524 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7525 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7526 dsub_0)>, 7527 Requires<[HasNEON]>; 7528 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7529 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7530 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7531 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7532 dsub_0)>, 7533 Requires<[HasNEON]>; 7534 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7535 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7536 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7537 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7538 dsub_0)>, 7539 Requires<[HasNEON]>; 7540} 7541 7542// The following class definition is basically a copy of the 7543// Lengthen_HalfSingle definition above, however with an additional parameter 7544// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7545// data loaded by VLD1LN into proper vector format in big endian mode. 7546multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7547 string InsnLanes, string InsnTy, string RevLanes> { 7548 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7549 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7550 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7551 (!cast<Instruction>("VREV32d" # RevLanes) 7552 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7553 dsub_0)>, 7554 Requires<[HasNEON]>; 7555 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7556 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7557 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7558 (!cast<Instruction>("VREV32d" # RevLanes) 7559 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7560 dsub_0)>, 7561 Requires<[HasNEON]>; 7562 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7563 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7564 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7565 (!cast<Instruction>("VREV32d" # RevLanes) 7566 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7567 dsub_0)>, 7568 Requires<[HasNEON]>; 7569} 7570 7571// extload, zextload and sextload for a lengthening load followed by another 7572// lengthening load, to quadruple the initial length. 7573// 7574// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 7575// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 7576// (EXTRACT_SUBREG (VMOVLuv4i32 7577// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7578// (f64 (IMPLICIT_DEF)), 7579// (i32 0))), 7580// dsub_0)), 7581// dsub_0)>; 7582multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 7583 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7584 string Insn2Ty> { 7585 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7586 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7587 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7588 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7589 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7590 dsub_0))>, 7591 Requires<[HasNEON]>; 7592 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7593 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7594 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7595 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7596 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7597 dsub_0))>, 7598 Requires<[HasNEON]>; 7599 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7600 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7601 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7602 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7603 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7604 dsub_0))>, 7605 Requires<[HasNEON]>; 7606} 7607 7608// The following class definition is basically a copy of the 7609// Lengthen_Double definition above, however with an additional parameter 7610// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7611// data loaded by VLD1LN into proper vector format in big endian mode. 7612multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7613 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7614 string Insn2Ty, string RevLanes> { 7615 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7616 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7617 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7618 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7619 (!cast<Instruction>("VREV32d" # RevLanes) 7620 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7621 dsub_0))>, 7622 Requires<[HasNEON]>; 7623 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7624 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7625 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7626 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7627 (!cast<Instruction>("VREV32d" # RevLanes) 7628 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7629 dsub_0))>, 7630 Requires<[HasNEON]>; 7631 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7632 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7633 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7634 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7635 (!cast<Instruction>("VREV32d" # RevLanes) 7636 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7637 dsub_0))>, 7638 Requires<[HasNEON]>; 7639} 7640 7641// extload, zextload and sextload for a lengthening load followed by another 7642// lengthening load, to quadruple the initial length, but which ends up only 7643// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 7644// 7645// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 7646// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 7647// (EXTRACT_SUBREG (VMOVLuv4i32 7648// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 7649// (f64 (IMPLICIT_DEF)), (i32 0))), 7650// dsub_0)), 7651// dsub_0)>; 7652multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 7653 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7654 string Insn2Ty> { 7655 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7656 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7657 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7658 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7659 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7660 dsub_0)), 7661 dsub_0)>, 7662 Requires<[HasNEON]>; 7663 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7664 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7665 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7666 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7667 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7668 dsub_0)), 7669 dsub_0)>, 7670 Requires<[HasNEON]>; 7671 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7672 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7673 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7674 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7675 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7676 dsub_0)), 7677 dsub_0)>, 7678 Requires<[HasNEON]>; 7679} 7680 7681// The following class definition is basically a copy of the 7682// Lengthen_HalfDouble definition above, however with an additional VREV16d8 7683// instruction to convert data loaded by VLD1LN into proper vector format 7684// in big endian mode. 7685multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7686 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7687 string Insn2Ty> { 7688 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7689 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7690 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7691 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7692 (!cast<Instruction>("VREV16d8") 7693 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7694 dsub_0)), 7695 dsub_0)>, 7696 Requires<[HasNEON]>; 7697 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7698 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7699 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7700 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7701 (!cast<Instruction>("VREV16d8") 7702 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7703 dsub_0)), 7704 dsub_0)>, 7705 Requires<[HasNEON]>; 7706 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7707 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7708 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7709 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7710 (!cast<Instruction>("VREV16d8") 7711 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7712 dsub_0)), 7713 dsub_0)>, 7714 Requires<[HasNEON]>; 7715} 7716 7717defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 7718defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 7719defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 7720 7721let Predicates = [HasNEON,IsLE] in { 7722 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 7723 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 7724 7725 // Double lengthening - v4i8 -> v4i16 -> v4i32 7726 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 7727 // v2i8 -> v2i16 -> v2i32 7728 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 7729 // v2i16 -> v2i32 -> v2i64 7730 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 7731} 7732 7733let Predicates = [HasNEON,IsBE] in { 7734 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 7735 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 7736 7737 // Double lengthening - v4i8 -> v4i16 -> v4i32 7738 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 7739 // v2i8 -> v2i16 -> v2i32 7740 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 7741 // v2i16 -> v2i32 -> v2i64 7742 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 7743} 7744 7745// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 7746let Predicates = [HasNEON,IsLE] in { 7747 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7748 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7749 (VLD1LNd16 addrmode6:$addr, 7750 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7751 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7752 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7753 (VLD1LNd16 addrmode6:$addr, 7754 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7755 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7756 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7757 (VLD1LNd16 addrmode6:$addr, 7758 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7759} 7760// The following patterns are basically a copy of the patterns above, 7761// however with an additional VREV16d instruction to convert data 7762// loaded by VLD1LN into proper vector format in big endian mode. 7763let Predicates = [HasNEON,IsBE] in { 7764 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7765 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7766 (!cast<Instruction>("VREV16d8") 7767 (VLD1LNd16 addrmode6:$addr, 7768 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7769 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7770 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7771 (!cast<Instruction>("VREV16d8") 7772 (VLD1LNd16 addrmode6:$addr, 7773 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7774 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7775 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7776 (!cast<Instruction>("VREV16d8") 7777 (VLD1LNd16 addrmode6:$addr, 7778 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7779} 7780 7781let Predicates = [HasNEON] in { 7782def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), 7783 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7784def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), 7785 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7786def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), 7787 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7788def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), 7789 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7790def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), 7791 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7792def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)), 7793 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7794} 7795 7796//===----------------------------------------------------------------------===// 7797// Assembler aliases 7798// 7799 7800def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 7801 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 7802def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 7803 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 7804 7805// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 7806defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 7807 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7808defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 7809 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7810defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 7811 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7812defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 7813 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7814defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 7815 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7816defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 7817 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7818defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 7819 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7820defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 7821 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7822// ... two-operand aliases 7823defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 7824 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7825defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 7826 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7827defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 7828 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7829defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 7830 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7831defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 7832 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7833defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 7834 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7835// ... immediates 7836def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 7837 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 7838def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 7839 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 7840def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 7841 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 7842def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 7843 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 7844 7845 7846// VLD1 single-lane pseudo-instructions. These need special handling for 7847// the lane index that an InstAlias can't handle, so we use these instead. 7848def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 7849 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7850 pred:$p)>; 7851def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 7852 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7853 pred:$p)>; 7854def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 7855 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7856 pred:$p)>; 7857 7858def VLD1LNdWB_fixed_Asm_8 : 7859 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 7860 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7861 pred:$p)>; 7862def VLD1LNdWB_fixed_Asm_16 : 7863 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 7864 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7865 pred:$p)>; 7866def VLD1LNdWB_fixed_Asm_32 : 7867 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 7868 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7869 pred:$p)>; 7870def VLD1LNdWB_register_Asm_8 : 7871 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 7872 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7873 rGPR:$Rm, pred:$p)>; 7874def VLD1LNdWB_register_Asm_16 : 7875 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 7876 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7877 rGPR:$Rm, pred:$p)>; 7878def VLD1LNdWB_register_Asm_32 : 7879 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 7880 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7881 rGPR:$Rm, pred:$p)>; 7882 7883 7884// VST1 single-lane pseudo-instructions. These need special handling for 7885// the lane index that an InstAlias can't handle, so we use these instead. 7886def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 7887 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7888 pred:$p)>; 7889def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 7890 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7891 pred:$p)>; 7892def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 7893 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7894 pred:$p)>; 7895 7896def VST1LNdWB_fixed_Asm_8 : 7897 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 7898 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7899 pred:$p)>; 7900def VST1LNdWB_fixed_Asm_16 : 7901 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 7902 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7903 pred:$p)>; 7904def VST1LNdWB_fixed_Asm_32 : 7905 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 7906 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7907 pred:$p)>; 7908def VST1LNdWB_register_Asm_8 : 7909 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 7910 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7911 rGPR:$Rm, pred:$p)>; 7912def VST1LNdWB_register_Asm_16 : 7913 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 7914 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7915 rGPR:$Rm, pred:$p)>; 7916def VST1LNdWB_register_Asm_32 : 7917 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 7918 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7919 rGPR:$Rm, pred:$p)>; 7920 7921// VLD2 single-lane pseudo-instructions. These need special handling for 7922// the lane index that an InstAlias can't handle, so we use these instead. 7923def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 7924 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7925 pred:$p)>; 7926def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 7927 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7928 pred:$p)>; 7929def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 7930 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 7931def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 7932 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7933 pred:$p)>; 7934def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 7935 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7936 pred:$p)>; 7937 7938def VLD2LNdWB_fixed_Asm_8 : 7939 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 7940 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7941 pred:$p)>; 7942def VLD2LNdWB_fixed_Asm_16 : 7943 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 7944 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7945 pred:$p)>; 7946def VLD2LNdWB_fixed_Asm_32 : 7947 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 7948 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7949 pred:$p)>; 7950def VLD2LNqWB_fixed_Asm_16 : 7951 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 7952 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7953 pred:$p)>; 7954def VLD2LNqWB_fixed_Asm_32 : 7955 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 7956 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7957 pred:$p)>; 7958def VLD2LNdWB_register_Asm_8 : 7959 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 7960 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7961 rGPR:$Rm, pred:$p)>; 7962def VLD2LNdWB_register_Asm_16 : 7963 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 7964 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7965 rGPR:$Rm, pred:$p)>; 7966def VLD2LNdWB_register_Asm_32 : 7967 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 7968 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7969 rGPR:$Rm, pred:$p)>; 7970def VLD2LNqWB_register_Asm_16 : 7971 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 7972 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7973 rGPR:$Rm, pred:$p)>; 7974def VLD2LNqWB_register_Asm_32 : 7975 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 7976 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7977 rGPR:$Rm, pred:$p)>; 7978 7979 7980// VST2 single-lane pseudo-instructions. These need special handling for 7981// the lane index that an InstAlias can't handle, so we use these instead. 7982def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 7983 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7984 pred:$p)>; 7985def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 7986 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7987 pred:$p)>; 7988def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 7989 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7990 pred:$p)>; 7991def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 7992 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7993 pred:$p)>; 7994def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 7995 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7996 pred:$p)>; 7997 7998def VST2LNdWB_fixed_Asm_8 : 7999 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 8000 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8001 pred:$p)>; 8002def VST2LNdWB_fixed_Asm_16 : 8003 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8004 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8005 pred:$p)>; 8006def VST2LNdWB_fixed_Asm_32 : 8007 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8008 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8009 pred:$p)>; 8010def VST2LNqWB_fixed_Asm_16 : 8011 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8012 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8013 pred:$p)>; 8014def VST2LNqWB_fixed_Asm_32 : 8015 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8016 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8017 pred:$p)>; 8018def VST2LNdWB_register_Asm_8 : 8019 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 8020 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8021 rGPR:$Rm, pred:$p)>; 8022def VST2LNdWB_register_Asm_16 : 8023 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8024 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8025 rGPR:$Rm, pred:$p)>; 8026def VST2LNdWB_register_Asm_32 : 8027 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8028 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8029 rGPR:$Rm, pred:$p)>; 8030def VST2LNqWB_register_Asm_16 : 8031 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8032 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8033 rGPR:$Rm, pred:$p)>; 8034def VST2LNqWB_register_Asm_32 : 8035 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8036 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8037 rGPR:$Rm, pred:$p)>; 8038 8039// VLD3 all-lanes pseudo-instructions. These need special handling for 8040// the lane index that an InstAlias can't handle, so we use these instead. 8041def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8042 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8043 pred:$p)>; 8044def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8045 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8046 pred:$p)>; 8047def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8048 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8049 pred:$p)>; 8050def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8051 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8052 pred:$p)>; 8053def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8054 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8055 pred:$p)>; 8056def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8057 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8058 pred:$p)>; 8059 8060def VLD3DUPdWB_fixed_Asm_8 : 8061 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8062 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8063 pred:$p)>; 8064def VLD3DUPdWB_fixed_Asm_16 : 8065 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8066 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8067 pred:$p)>; 8068def VLD3DUPdWB_fixed_Asm_32 : 8069 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8070 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8071 pred:$p)>; 8072def VLD3DUPqWB_fixed_Asm_8 : 8073 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8074 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8075 pred:$p)>; 8076def VLD3DUPqWB_fixed_Asm_16 : 8077 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8078 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8079 pred:$p)>; 8080def VLD3DUPqWB_fixed_Asm_32 : 8081 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8082 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8083 pred:$p)>; 8084def VLD3DUPdWB_register_Asm_8 : 8085 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8086 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8087 rGPR:$Rm, pred:$p)>; 8088def VLD3DUPdWB_register_Asm_16 : 8089 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8090 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8091 rGPR:$Rm, pred:$p)>; 8092def VLD3DUPdWB_register_Asm_32 : 8093 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8094 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8095 rGPR:$Rm, pred:$p)>; 8096def VLD3DUPqWB_register_Asm_8 : 8097 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8098 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8099 rGPR:$Rm, pred:$p)>; 8100def VLD3DUPqWB_register_Asm_16 : 8101 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8102 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8103 rGPR:$Rm, pred:$p)>; 8104def VLD3DUPqWB_register_Asm_32 : 8105 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8106 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8107 rGPR:$Rm, pred:$p)>; 8108 8109 8110// VLD3 single-lane pseudo-instructions. These need special handling for 8111// the lane index that an InstAlias can't handle, so we use these instead. 8112def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8113 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8114 pred:$p)>; 8115def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8116 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8117 pred:$p)>; 8118def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8119 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8120 pred:$p)>; 8121def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8122 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8123 pred:$p)>; 8124def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8125 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8126 pred:$p)>; 8127 8128def VLD3LNdWB_fixed_Asm_8 : 8129 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8130 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8131 pred:$p)>; 8132def VLD3LNdWB_fixed_Asm_16 : 8133 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8134 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8135 pred:$p)>; 8136def VLD3LNdWB_fixed_Asm_32 : 8137 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8138 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8139 pred:$p)>; 8140def VLD3LNqWB_fixed_Asm_16 : 8141 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8142 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8143 pred:$p)>; 8144def VLD3LNqWB_fixed_Asm_32 : 8145 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8146 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8147 pred:$p)>; 8148def VLD3LNdWB_register_Asm_8 : 8149 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8150 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8151 rGPR:$Rm, pred:$p)>; 8152def VLD3LNdWB_register_Asm_16 : 8153 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8154 (ins VecListThreeDHWordIndexed:$list, 8155 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8156def VLD3LNdWB_register_Asm_32 : 8157 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8158 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8159 rGPR:$Rm, pred:$p)>; 8160def VLD3LNqWB_register_Asm_16 : 8161 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8162 (ins VecListThreeQHWordIndexed:$list, 8163 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8164def VLD3LNqWB_register_Asm_32 : 8165 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8166 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8167 rGPR:$Rm, pred:$p)>; 8168 8169// VLD3 multiple structure pseudo-instructions. These need special handling for 8170// the vector operands that the normal instructions don't yet model. 8171// FIXME: Remove these when the register classes and instructions are updated. 8172def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8173 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8174def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8175 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8176def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8177 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8178def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8179 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8180def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8181 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8182def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8183 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8184 8185def VLD3dWB_fixed_Asm_8 : 8186 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8187 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8188def VLD3dWB_fixed_Asm_16 : 8189 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8190 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8191def VLD3dWB_fixed_Asm_32 : 8192 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8193 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8194def VLD3qWB_fixed_Asm_8 : 8195 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8196 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8197def VLD3qWB_fixed_Asm_16 : 8198 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8199 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8200def VLD3qWB_fixed_Asm_32 : 8201 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8202 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8203def VLD3dWB_register_Asm_8 : 8204 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8205 (ins VecListThreeD:$list, addrmode6align64:$addr, 8206 rGPR:$Rm, pred:$p)>; 8207def VLD3dWB_register_Asm_16 : 8208 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8209 (ins VecListThreeD:$list, addrmode6align64:$addr, 8210 rGPR:$Rm, pred:$p)>; 8211def VLD3dWB_register_Asm_32 : 8212 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8213 (ins VecListThreeD:$list, addrmode6align64:$addr, 8214 rGPR:$Rm, pred:$p)>; 8215def VLD3qWB_register_Asm_8 : 8216 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8217 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8218 rGPR:$Rm, pred:$p)>; 8219def VLD3qWB_register_Asm_16 : 8220 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8221 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8222 rGPR:$Rm, pred:$p)>; 8223def VLD3qWB_register_Asm_32 : 8224 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8225 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8226 rGPR:$Rm, pred:$p)>; 8227 8228// VST3 single-lane pseudo-instructions. These need special handling for 8229// the lane index that an InstAlias can't handle, so we use these instead. 8230def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8231 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8232 pred:$p)>; 8233def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8234 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8235 pred:$p)>; 8236def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8237 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8238 pred:$p)>; 8239def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8240 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8241 pred:$p)>; 8242def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8243 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8244 pred:$p)>; 8245 8246def VST3LNdWB_fixed_Asm_8 : 8247 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8248 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8249 pred:$p)>; 8250def VST3LNdWB_fixed_Asm_16 : 8251 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8252 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8253 pred:$p)>; 8254def VST3LNdWB_fixed_Asm_32 : 8255 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8256 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8257 pred:$p)>; 8258def VST3LNqWB_fixed_Asm_16 : 8259 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8260 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8261 pred:$p)>; 8262def VST3LNqWB_fixed_Asm_32 : 8263 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8264 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8265 pred:$p)>; 8266def VST3LNdWB_register_Asm_8 : 8267 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8268 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8269 rGPR:$Rm, pred:$p)>; 8270def VST3LNdWB_register_Asm_16 : 8271 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8272 (ins VecListThreeDHWordIndexed:$list, 8273 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8274def VST3LNdWB_register_Asm_32 : 8275 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8276 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8277 rGPR:$Rm, pred:$p)>; 8278def VST3LNqWB_register_Asm_16 : 8279 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8280 (ins VecListThreeQHWordIndexed:$list, 8281 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8282def VST3LNqWB_register_Asm_32 : 8283 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8284 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8285 rGPR:$Rm, pred:$p)>; 8286 8287 8288// VST3 multiple structure pseudo-instructions. These need special handling for 8289// the vector operands that the normal instructions don't yet model. 8290// FIXME: Remove these when the register classes and instructions are updated. 8291def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8292 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8293def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8294 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8295def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8296 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8297def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8298 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8299def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8300 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8301def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8302 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8303 8304def VST3dWB_fixed_Asm_8 : 8305 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8306 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8307def VST3dWB_fixed_Asm_16 : 8308 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8309 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8310def VST3dWB_fixed_Asm_32 : 8311 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8312 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8313def VST3qWB_fixed_Asm_8 : 8314 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8315 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8316def VST3qWB_fixed_Asm_16 : 8317 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8318 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8319def VST3qWB_fixed_Asm_32 : 8320 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8321 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8322def VST3dWB_register_Asm_8 : 8323 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8324 (ins VecListThreeD:$list, addrmode6align64:$addr, 8325 rGPR:$Rm, pred:$p)>; 8326def VST3dWB_register_Asm_16 : 8327 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8328 (ins VecListThreeD:$list, addrmode6align64:$addr, 8329 rGPR:$Rm, pred:$p)>; 8330def VST3dWB_register_Asm_32 : 8331 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8332 (ins VecListThreeD:$list, addrmode6align64:$addr, 8333 rGPR:$Rm, pred:$p)>; 8334def VST3qWB_register_Asm_8 : 8335 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8336 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8337 rGPR:$Rm, pred:$p)>; 8338def VST3qWB_register_Asm_16 : 8339 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8340 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8341 rGPR:$Rm, pred:$p)>; 8342def VST3qWB_register_Asm_32 : 8343 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8344 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8345 rGPR:$Rm, pred:$p)>; 8346 8347// VLD4 all-lanes pseudo-instructions. These need special handling for 8348// the lane index that an InstAlias can't handle, so we use these instead. 8349def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8350 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8351 pred:$p)>; 8352def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8353 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8354 pred:$p)>; 8355def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8356 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8357 pred:$p)>; 8358def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8359 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8360 pred:$p)>; 8361def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8362 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8363 pred:$p)>; 8364def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8365 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8366 pred:$p)>; 8367 8368def VLD4DUPdWB_fixed_Asm_8 : 8369 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8370 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8371 pred:$p)>; 8372def VLD4DUPdWB_fixed_Asm_16 : 8373 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8374 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8375 pred:$p)>; 8376def VLD4DUPdWB_fixed_Asm_32 : 8377 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8378 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8379 pred:$p)>; 8380def VLD4DUPqWB_fixed_Asm_8 : 8381 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8382 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8383 pred:$p)>; 8384def VLD4DUPqWB_fixed_Asm_16 : 8385 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8386 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8387 pred:$p)>; 8388def VLD4DUPqWB_fixed_Asm_32 : 8389 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8390 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8391 pred:$p)>; 8392def VLD4DUPdWB_register_Asm_8 : 8393 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8394 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8395 rGPR:$Rm, pred:$p)>; 8396def VLD4DUPdWB_register_Asm_16 : 8397 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8398 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8399 rGPR:$Rm, pred:$p)>; 8400def VLD4DUPdWB_register_Asm_32 : 8401 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8402 (ins VecListFourDAllLanes:$list, 8403 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8404def VLD4DUPqWB_register_Asm_8 : 8405 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8406 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8407 rGPR:$Rm, pred:$p)>; 8408def VLD4DUPqWB_register_Asm_16 : 8409 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8410 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8411 rGPR:$Rm, pred:$p)>; 8412def VLD4DUPqWB_register_Asm_32 : 8413 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8414 (ins VecListFourQAllLanes:$list, 8415 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8416 8417 8418// VLD4 single-lane pseudo-instructions. These need special handling for 8419// the lane index that an InstAlias can't handle, so we use these instead. 8420def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8421 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8422 pred:$p)>; 8423def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8424 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8425 pred:$p)>; 8426def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8427 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8428 pred:$p)>; 8429def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8430 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8431 pred:$p)>; 8432def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8433 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8434 pred:$p)>; 8435 8436def VLD4LNdWB_fixed_Asm_8 : 8437 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8438 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8439 pred:$p)>; 8440def VLD4LNdWB_fixed_Asm_16 : 8441 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8442 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8443 pred:$p)>; 8444def VLD4LNdWB_fixed_Asm_32 : 8445 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8446 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8447 pred:$p)>; 8448def VLD4LNqWB_fixed_Asm_16 : 8449 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8450 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8451 pred:$p)>; 8452def VLD4LNqWB_fixed_Asm_32 : 8453 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8454 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8455 pred:$p)>; 8456def VLD4LNdWB_register_Asm_8 : 8457 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8458 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8459 rGPR:$Rm, pred:$p)>; 8460def VLD4LNdWB_register_Asm_16 : 8461 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8462 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8463 rGPR:$Rm, pred:$p)>; 8464def VLD4LNdWB_register_Asm_32 : 8465 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8466 (ins VecListFourDWordIndexed:$list, 8467 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8468def VLD4LNqWB_register_Asm_16 : 8469 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8470 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8471 rGPR:$Rm, pred:$p)>; 8472def VLD4LNqWB_register_Asm_32 : 8473 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8474 (ins VecListFourQWordIndexed:$list, 8475 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8476 8477 8478 8479// VLD4 multiple structure pseudo-instructions. These need special handling for 8480// the vector operands that the normal instructions don't yet model. 8481// FIXME: Remove these when the register classes and instructions are updated. 8482def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8483 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8484 pred:$p)>; 8485def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8486 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8487 pred:$p)>; 8488def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8489 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8490 pred:$p)>; 8491def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8492 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8493 pred:$p)>; 8494def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8495 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8496 pred:$p)>; 8497def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8498 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8499 pred:$p)>; 8500 8501def VLD4dWB_fixed_Asm_8 : 8502 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8503 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8504 pred:$p)>; 8505def VLD4dWB_fixed_Asm_16 : 8506 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8507 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8508 pred:$p)>; 8509def VLD4dWB_fixed_Asm_32 : 8510 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8511 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8512 pred:$p)>; 8513def VLD4qWB_fixed_Asm_8 : 8514 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8515 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8516 pred:$p)>; 8517def VLD4qWB_fixed_Asm_16 : 8518 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8519 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8520 pred:$p)>; 8521def VLD4qWB_fixed_Asm_32 : 8522 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8523 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8524 pred:$p)>; 8525def VLD4dWB_register_Asm_8 : 8526 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8527 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8528 rGPR:$Rm, pred:$p)>; 8529def VLD4dWB_register_Asm_16 : 8530 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8531 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8532 rGPR:$Rm, pred:$p)>; 8533def VLD4dWB_register_Asm_32 : 8534 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8535 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8536 rGPR:$Rm, pred:$p)>; 8537def VLD4qWB_register_Asm_8 : 8538 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8539 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8540 rGPR:$Rm, pred:$p)>; 8541def VLD4qWB_register_Asm_16 : 8542 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8543 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8544 rGPR:$Rm, pred:$p)>; 8545def VLD4qWB_register_Asm_32 : 8546 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8547 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8548 rGPR:$Rm, pred:$p)>; 8549 8550// VST4 single-lane pseudo-instructions. These need special handling for 8551// the lane index that an InstAlias can't handle, so we use these instead. 8552def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8553 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8554 pred:$p)>; 8555def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8556 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8557 pred:$p)>; 8558def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8559 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8560 pred:$p)>; 8561def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8562 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8563 pred:$p)>; 8564def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8565 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8566 pred:$p)>; 8567 8568def VST4LNdWB_fixed_Asm_8 : 8569 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8570 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8571 pred:$p)>; 8572def VST4LNdWB_fixed_Asm_16 : 8573 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8574 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8575 pred:$p)>; 8576def VST4LNdWB_fixed_Asm_32 : 8577 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8578 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8579 pred:$p)>; 8580def VST4LNqWB_fixed_Asm_16 : 8581 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8582 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8583 pred:$p)>; 8584def VST4LNqWB_fixed_Asm_32 : 8585 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8586 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8587 pred:$p)>; 8588def VST4LNdWB_register_Asm_8 : 8589 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8590 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8591 rGPR:$Rm, pred:$p)>; 8592def VST4LNdWB_register_Asm_16 : 8593 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8594 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8595 rGPR:$Rm, pred:$p)>; 8596def VST4LNdWB_register_Asm_32 : 8597 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8598 (ins VecListFourDWordIndexed:$list, 8599 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8600def VST4LNqWB_register_Asm_16 : 8601 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8602 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8603 rGPR:$Rm, pred:$p)>; 8604def VST4LNqWB_register_Asm_32 : 8605 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8606 (ins VecListFourQWordIndexed:$list, 8607 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8608 8609 8610// VST4 multiple structure pseudo-instructions. These need special handling for 8611// the vector operands that the normal instructions don't yet model. 8612// FIXME: Remove these when the register classes and instructions are updated. 8613def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8614 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8615 pred:$p)>; 8616def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8617 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8618 pred:$p)>; 8619def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8620 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8621 pred:$p)>; 8622def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8623 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8624 pred:$p)>; 8625def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8626 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8627 pred:$p)>; 8628def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8629 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8630 pred:$p)>; 8631 8632def VST4dWB_fixed_Asm_8 : 8633 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8634 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8635 pred:$p)>; 8636def VST4dWB_fixed_Asm_16 : 8637 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8638 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8639 pred:$p)>; 8640def VST4dWB_fixed_Asm_32 : 8641 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8642 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8643 pred:$p)>; 8644def VST4qWB_fixed_Asm_8 : 8645 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8646 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8647 pred:$p)>; 8648def VST4qWB_fixed_Asm_16 : 8649 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8650 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8651 pred:$p)>; 8652def VST4qWB_fixed_Asm_32 : 8653 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8654 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8655 pred:$p)>; 8656def VST4dWB_register_Asm_8 : 8657 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8658 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8659 rGPR:$Rm, pred:$p)>; 8660def VST4dWB_register_Asm_16 : 8661 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8662 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8663 rGPR:$Rm, pred:$p)>; 8664def VST4dWB_register_Asm_32 : 8665 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8666 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8667 rGPR:$Rm, pred:$p)>; 8668def VST4qWB_register_Asm_8 : 8669 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8670 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8671 rGPR:$Rm, pred:$p)>; 8672def VST4qWB_register_Asm_16 : 8673 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8674 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8675 rGPR:$Rm, pred:$p)>; 8676def VST4qWB_register_Asm_32 : 8677 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8678 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8679 rGPR:$Rm, pred:$p)>; 8680 8681// VMOV/VMVN takes an optional datatype suffix 8682defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8683 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 8684defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8685 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 8686 8687defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8688 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 8689defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8690 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 8691 8692// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8693// D-register versions. 8694def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 8695 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8696def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 8697 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8698def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 8699 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8700def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 8701 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8702def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 8703 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8704def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 8705 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8706def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 8707 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8708let Predicates = [HasNEON, HasFullFP16] in 8709def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", 8710 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8711// Q-register versions. 8712def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 8713 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8714def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 8715 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8716def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 8717 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8718def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 8719 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8720def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 8721 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8722def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 8723 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8724def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 8725 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8726let Predicates = [HasNEON, HasFullFP16] in 8727def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", 8728 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8729 8730// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8731// D-register versions. 8732def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 8733 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8734def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 8735 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8736def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 8737 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8738def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 8739 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8740def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 8741 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8742def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 8743 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8744def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 8745 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8746let Predicates = [HasNEON, HasFullFP16] in 8747def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", 8748 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8749// Q-register versions. 8750def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 8751 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8752def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 8753 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8754def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 8755 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8756def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 8757 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8758def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 8759 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8760def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 8761 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8762def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 8763 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8764let Predicates = [HasNEON, HasFullFP16] in 8765def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", 8766 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8767 8768// VSWP allows, but does not require, a type suffix. 8769defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8770 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 8771defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8772 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 8773 8774// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 8775defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8776 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8777defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8778 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8779defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8780 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8781defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8782 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8783defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8784 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8785defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8786 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8787 8788// "vmov Rd, #-imm" can be handled via "vmvn". 8789def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8790 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8791def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8792 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8793def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8794 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8795def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8796 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8797 8798// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 8799// these should restrict to just the Q register variants, but the register 8800// classes are enough to match correctly regardless, so we keep it simple 8801// and just use MnemonicAlias. 8802def : NEONMnemonicAlias<"vbicq", "vbic">; 8803def : NEONMnemonicAlias<"vandq", "vand">; 8804def : NEONMnemonicAlias<"veorq", "veor">; 8805def : NEONMnemonicAlias<"vorrq", "vorr">; 8806 8807def : NEONMnemonicAlias<"vmovq", "vmov">; 8808def : NEONMnemonicAlias<"vmvnq", "vmvn">; 8809// Explicit versions for floating point so that the FPImm variants get 8810// handled early. The parser gets confused otherwise. 8811def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 8812def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 8813 8814def : NEONMnemonicAlias<"vaddq", "vadd">; 8815def : NEONMnemonicAlias<"vsubq", "vsub">; 8816 8817def : NEONMnemonicAlias<"vminq", "vmin">; 8818def : NEONMnemonicAlias<"vmaxq", "vmax">; 8819 8820def : NEONMnemonicAlias<"vmulq", "vmul">; 8821 8822def : NEONMnemonicAlias<"vabsq", "vabs">; 8823 8824def : NEONMnemonicAlias<"vshlq", "vshl">; 8825def : NEONMnemonicAlias<"vshrq", "vshr">; 8826 8827def : NEONMnemonicAlias<"vcvtq", "vcvt">; 8828 8829def : NEONMnemonicAlias<"vcleq", "vcle">; 8830def : NEONMnemonicAlias<"vceqq", "vceq">; 8831 8832def : NEONMnemonicAlias<"vzipq", "vzip">; 8833def : NEONMnemonicAlias<"vswpq", "vswp">; 8834 8835def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 8836def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 8837 8838 8839// Alias for loading floating point immediates that aren't representable 8840// using the vmov.f32 encoding but the bitpattern is representable using 8841// the .i32 encoding. 8842def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 8843 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 8844def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 8845 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 8846