Lines Matching +full:two +full:- +full:lane

1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific Operands.
16 //===----------------------------------------------------------------------===//
131 // Register list of two sequential D registers.
158 // Register list of two D registers spaced by 2 (two sequential Q registers).
195 // Register list of two D registers, with "all lanes" subscripting.
205 // Register list of two D registers spaced by 2 (two sequential Q registers).
256 // Register list of one D register, with byte lane subscripting.
266 // ...with half-word lane subscripting.
276 // ...with word lane subscripting.
287 // Register list of two D registers with byte lane subscripting.
297 // ...with half-word lane subscripting.
307 // ...with word lane subscripting.
317 // Register list of two Q registers with half-word lane subscripting.
327 // ...with word lane subscripting.
339 // Register list of three D registers with byte lane subscripting.
349 // ...with half-word lane subscripting.
359 // ...with word lane subscripting.
369 // Register list of three Q registers with half-word lane subscripting.
379 // ...with word lane subscripting.
390 // Register list of four D registers with byte lane subscripting.
400 // ...with half-word lane subscripting.
410 // ...with word lane subscripting.
420 // Register list of four Q registers with half-word lane subscripting.
430 // ...with word lane subscripting.
442 return cast<LoadSDNode>(N)->getAlign() >= 8;
446 return cast<StoreSDNode>(N)->getAlign() >= 8;
449 return cast<LoadSDNode>(N)->getAlign() == 4;
453 return cast<StoreSDNode>(N)->getAlign() == 4;
456 return cast<LoadSDNode>(N)->getAlign() == 2;
460 return cast<StoreSDNode>(N)->getAlign() == 2;
463 return cast<LoadSDNode>(N)->getAlign() == 1;
467 return cast<StoreSDNode>(N)->getAlign() == 1;
470 return cast<LoadSDNode>(N)->getAlign() < 4;
474 return cast<StoreSDNode>(N)->getAlign() < 4;
477 //===----------------------------------------------------------------------===//
478 // NEON-specific DAG Nodes.
479 //===----------------------------------------------------------------------===//
537 //===----------------------------------------------------------------------===//
539 //===----------------------------------------------------------------------===//
555 // Classes for VLD* pseudo-instructions with multi-register operands.
612 let Inst{5-4} = Rn{5-4};
650 let Inst{5-4} = Rn{5-4};
657 let Inst{5-4} = Rn{5-4};
740 let Inst{5-4} = Rn{5-4};
749 let Inst{5-4} = Rn{5-4};
756 let Inst{5-4} = Rn{5-4};
797 // VLD2 : Vector Load (multiple 2-element structures)
804 let Inst{5-4} = Rn{5-4};
834 let Inst{5-4} = Rn{5-4};
841 let Inst{5-4} = Rn{5-4};
867 // ...with double-spaced registers
881 // VLD3 : Vector Load (multiple 3-element structures)
918 // ...with double-spaced registers:
939 // VLD4 : Vector Load (multiple 4-element structures)
947 let Inst{5-4} = Rn{5-4};
966 let Inst{5-4} = Rn{5-4};
978 // ...with double-spaced registers:
1001 // Classes for VLD*LN pseudo-instructions with multi-register operands.
1005 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1010 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1013 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1018 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1021 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1026 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1028 // VLD1LN : Vector Load (single element to one lane)
1032 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
1033 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1037 imm:$lane))]> {
1044 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
1045 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1049 imm:$lane))]>, Sched<[WriteVLD1]> {
1057 imm:$lane))];
1061 let Inst{7-5} = lane{2-0};
1064 let Inst{7-6} = lane{1-0};
1065 let Inst{5-4} = Rn{5-4};
1068 let Inst{7} = lane{0};
1069 let Inst{5-4} = Rn{5-4};
1078 (f16 (load addrmode6:$addr)), imm:$lane),
1079 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
1081 (f16 (load addrmode6:$addr)), imm:$lane),
1082 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1084 (bf16 (load addrmode6:$addr)), imm:$lane),
1085 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
1087 (bf16 (load addrmode6:$addr)), imm:$lane),
1088 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1090 (f32 (load addrmode6:$addr)), imm:$lane),
1091 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1093 (f32 (load addrmode6:$addr)), imm:$lane),
1094 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1096 // A 64-bit subvector insert to the first 128-bit vector position
1119 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1120 "\\{$Vd[$lane]\\}, $Rn$Rm",
1126 let Inst{7-5} = lane{2-0};
1129 let Inst{7-6} = lane{1-0};
1133 let Inst{7} = lane{0};
1142 // VLD2LN : Vector Load (single 2-element structure to one lane)
1145 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1146 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1154 let Inst{7-5} = lane{2-0};
1157 let Inst{7-6} = lane{1-0};
1160 let Inst{7} = lane{0};
1167 // ...with double-spaced registers:
1169 let Inst{7-6} = lane{1-0};
1172 let Inst{7} = lane{0};
1182 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1183 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1190 let Inst{7-5} = lane{2-0};
1193 let Inst{7-6} = lane{1-0};
1196 let Inst{7} = lane{0};
1204 let Inst{7-6} = lane{1-0};
1207 let Inst{7} = lane{0};
1213 // VLD3LN : Vector Load (single 3-element structure to one lane)
1217 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1218 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1225 let Inst{7-5} = lane{2-0};
1228 let Inst{7-6} = lane{1-0};
1231 let Inst{7} = lane{0};
1238 // ...with double-spaced registers:
1240 let Inst{7-6} = lane{1-0};
1243 let Inst{7} = lane{0};
1254 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1256 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1263 let Inst{7-5} = lane{2-0};
1266 let Inst{7-6} = lane{1-0};
1269 let Inst{7} = lane{0};
1277 let Inst{7-6} = lane{1-0};
1280 let Inst{7} = lane{0};
1286 // VLD4LN : Vector Load (single 4-element structure to one lane)
1291 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1292 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1301 let Inst{7-5} = lane{2-0};
1304 let Inst{7-6} = lane{1-0};
1307 let Inst{7} = lane{0};
1315 // ...with double-spaced registers:
1317 let Inst{7-6} = lane{1-0};
1320 let Inst{7} = lane{0};
1332 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1334 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1342 let Inst{7-5} = lane{2-0};
1345 let Inst{7-6} = lane{1-0};
1348 let Inst{7} = lane{0};
1357 let Inst{7-6} = lane{1-0};
1360 let Inst{7} = lane{0};
1467 // VLD2DUP : Vector Load (single 2-element structure to all lanes)
1486 // ...with double-spaced registers
1558 // VLD3DUP : Vector Load (single 3-element structure to all lanes)
1577 // ...with double-spaced registers (not used for codegen):
1615 // VLD4DUP : Vector Load (single 4-element structure to all lanes)
1634 // ...with double-spaced registers (not used for codegen):
1677 // Classes for VST* pseudo-instructions with multi-register operands.
1727 let Inst{5-4} = Rn{5-4};
1766 let Inst{5-4} = Rn{5-4};
1774 let Inst{5-4} = Rn{5-4};
1804 let Inst{5-4} = Rn{5-4};
1812 let Inst{5-4} = Rn{5-4};
1862 let Inst{5-4} = Rn{5-4};
1871 let Inst{5-4} = Rn{5-4};
1879 let Inst{5-4} = Rn{5-4};
1922 // VST2 : Vector Store (multiple 2-element structures)
1928 let Inst{5-4} = Rn{5-4};
1958 let Inst{5-4} = Rn{5-4};
1965 let Inst{5-4} = Rn{5-4};
1975 let Inst{5-4} = Rn{5-4};
1983 let Inst{5-4} = Rn{5-4};
2006 // ...with double-spaced registers
2020 // VST3 : Vector Store (multiple 3-element structures)
2057 // ...with double-spaced registers:
2078 // VST4 : Vector Store (multiple 4-element structures)
2085 let Inst{5-4} = Rn{5-4};
2104 let Inst{5-4} = Rn{5-4};
2116 // ...with double-spaced registers:
2139 // Classes for VST*LN pseudo-instructions with multi-register operands.
2142 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
2147 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2149 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
2154 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2156 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
2161 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2163 // VST1LN : Vector Store (single element from one lane)
2167 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
2168 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
2169 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
2176 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2182 let Inst{7-5} = lane{2-0};
2186 let Inst{7-6} = lane{1-0};
2192 let Inst{7} = lane{0};
2193 let Inst{5-4} = Rn{5-4};
2201 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
2202 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
2203 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
2204 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2206 def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
2207 (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
2208 def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
2209 (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2217 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2218 "\\{$Vd[$lane]\\}, $Rn$Rm",
2220 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2227 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2233 let Inst{7-5} = lane{2-0};
2237 let Inst{7-6} = lane{1-0};
2242 let Inst{7} = lane{0};
2243 let Inst{5-4} = Rn{5-4};
2252 // VST2LN : Vector Store (single 2-element structure from one lane)
2255 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2256 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2264 let Inst{7-5} = lane{2-0};
2267 let Inst{7-6} = lane{1-0};
2270 let Inst{7} = lane{0};
2277 // ...with double-spaced registers:
2279 let Inst{7-6} = lane{1-0};
2283 let Inst{7} = lane{0};
2294 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2295 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2302 let Inst{7-5} = lane{2-0};
2305 let Inst{7-6} = lane{1-0};
2308 let Inst{7} = lane{0};
2316 let Inst{7-6} = lane{1-0};
2319 let Inst{7} = lane{0};
2325 // VST3LN : Vector Store (single 3-element structure from one lane)
2329 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2330 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
2337 let Inst{7-5} = lane{2-0};
2340 let Inst{7-6} = lane{1-0};
2343 let Inst{7} = lane{0};
2350 // ...with double-spaced registers:
2352 let Inst{7-6} = lane{1-0};
2355 let Inst{7} = lane{0};
2365 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2367 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2373 let Inst{7-5} = lane{2-0};
2376 let Inst{7-6} = lane{1-0};
2379 let Inst{7} = lane{0};
2387 let Inst{7-6} = lane{1-0};
2390 let Inst{7} = lane{0};
2396 // VST4LN : Vector Store (single 4-element structure from one lane)
2400 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2401 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2409 let Inst{7-5} = lane{2-0};
2412 let Inst{7-6} = lane{1-0};
2415 let Inst{7} = lane{0};
2423 // ...with double-spaced registers:
2425 let Inst{7-6} = lane{1-0};
2428 let Inst{7} = lane{0};
2439 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2441 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2448 let Inst{7-5} = lane{2-0};
2451 let Inst{7-6} = lane{1-0};
2454 let Inst{7} = lane{0};
2463 let Inst{7-6} = lane{1-0};
2466 let Inst{7} = lane{0};
2516 //===----------------------------------------------------------------------===//
2518 //===----------------------------------------------------------------------===//
2520 // Basic 2-register operations: double- and quad-register.
2534 // Basic 2-register intrinsics, both double- and quad-register.
2584 // Narrow 2-register operations.
2593 // Narrow 2-register intrinsics.
2602 // Long 2-register operations (currently only used for VMOVL).
2611 // Long 2-register intrinsics.
2620 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2632 // Basic 3-register operations: double- and quad-register.
2640 // All of these have a two-operand InstAlias.
2653 // All of these have a two-operand InstAlias.
2662 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2663 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2666 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2667 // All of these have a two-operand InstAlias.
2674 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2675 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2678 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2679 // All of these have a two-operand InstAlias.
2691 // All of these have a two-operand InstAlias.
2702 // All of these have a two-operand InstAlias.
2710 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2711 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2715 imm:$lane)))))]> {
2716 // All of these have a two-operand InstAlias.
2723 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2724 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2728 imm:$lane)))))]> {
2729 // All of these have a two-operand InstAlias.
2734 // Basic 3-register intrinsics, both double- and quad-register.
2742 // All of these have a two-operand InstAlias.
2761 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2762 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2766 imm:$lane)))))]> {
2773 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2774 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2777 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2798 // All of these have a two-operand InstAlias.
2831 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2832 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2836 imm:$lane)))))]> {
2843 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2844 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2848 imm:$lane)))))]> {
2862 // Multiply-Add/Sub operations: double- and quad-register.
2877 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2879 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2884 imm:$lane)))))))]>;
2890 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2892 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2897 imm:$lane)))))))]>;
2912 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2914 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2919 imm:$lane)))))))]>;
2926 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2928 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2933 imm:$lane)))))))]>;
2935 // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2953 // Neon 3-argument intrinsics, both double- and quad-register.
2972 // Long Multiply-Add/Sub operations.
2986 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2988 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2993 imm:$lane))))))]>;
2998 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3000 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3005 imm:$lane))))))]>;
3007 // Long Intrinsic-Op vector operations with explicit extend (VABAL).
3019 // Neon Long 3-argument intrinsic. The destination register is
3020 // a quad-register and is also used as the first source operand register.
3034 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3036 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3041 imm:$lane)))))]>;
3047 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3049 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3054 imm:$lane)))))]>;
3056 // Narrowing 3-register intrinsics.
3067 // Long 3-register operations.
3082 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3083 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3086 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
3091 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3092 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3095 (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
3097 // Long 3-register operations with explicitly extended operands.
3110 // Long 3-register intrinsics with explicit extend (VABDL).
3123 // Long 3-register intrinsics.
3150 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3151 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3155 imm:$lane)))))]>;
3160 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3161 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3165 imm:$lane)))))]>;
3167 // Wide 3-register operations.
3176 // All of these have a two-operand InstAlias.
3181 // Pairwise long 2-register intrinsics, both double- and quad-register.
3197 // Pairwise long 2-register accumulate intrinsics,
3198 // both double- and quad-register.
3218 // both double- and quad-register.
3258 // both double- and quad-register.
3279 // both double- and quad-register.
3298 // both double- and quad-register.
3314 //===----------------------------------------------------------------------===//
3316 //===----------------------------------------------------------------------===//
3324 // Neon 2-register vector operations and intrinsics.
3326 // Neon 2-register comparisons.
3331 // 64-bit vector types.
3358 // 128-bit vector types.
3386 // Neon 3-register comparisons.
3394 // All of these have a two-operand InstAlias.
3406 // All of these have a two-operand InstAlias.
3416 // 64-bit vector types.
3427 // 128-bit vector types.
3440 // Neon 2-register vector intrinsics,
3446 // 64-bit vector types.
3454 // 128-bit vector types.
3464 // Neon Narrowing 2-register vector operations,
3481 // Neon Narrowing 2-register vector intrinsics,
3499 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3512 // Neon 3-register vector operations.
3520 // 64-bit vector types.
3531 // 128-bit vector types.
3567 // Neon 3-register vector intrinsics.
3575 // 64-bit vector types.
3583 // 128-bit vector types.
3596 // 64-bit vector types.
3604 // 128-bit vector types.
3688 // Neon Narrowing 3-register vector intrinsics,
3705 // Neon Long 3-register vector operations.
3746 // Neon Long 3-register vector intrinsics.
3798 // Neon Wide 3-register vector intrinsics,
3815 // Neon Multiply-Op vector operations,
3821 // 64-bit vector types.
3829 // 128-bit vector types.
3854 // Neon Intrinsic-Op vector operations,
3860 // 64-bit vector types.
3868 // 128-bit vector types.
3877 // Neon 3-argument intrinsics,
3883 // 64-bit vector types.
3889 // 128-bit vector types.
3903 // 64-bit vector types.
3906 // 128-bit vector types.
3911 // Neon Long Multiply-Op vector operations,
3934 // Neon Long 3-argument intrinsics.
3979 // Neon Pairwise long 2-register intrinsics,
3984 // 64-bit vector types.
3992 // 128-bit vector types.
4002 // Neon Pairwise long 2-register accumulate intrinsics,
4007 // 64-bit vector types.
4015 // 128-bit vector types.
4025 // Neon 2-register vector shift by immediate,
4031 // 64-bit vector types.
4034 let Inst{21-19} = 0b001; // imm6 = 001xxx
4038 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4048 // 128-bit vector types.
4051 let Inst{21-19} = 0b001; // imm6 = 001xxx
4055 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4068 // 64-bit vector types.
4071 let Inst{21-19} = 0b001; // imm6 = 001xxx
4075 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4085 // 128-bit vector types.
4088 let Inst{21-19} = 0b001; // imm6 = 001xxx
4092 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4103 // Neon Shift-Accumulate vector operations,
4107 // 64-bit vector types.
4110 let Inst{21-19} = 0b001; // imm6 = 001xxx
4114 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4124 // 128-bit vector types.
4127 let Inst{21-19} = 0b001; // imm6 = 001xxx
4131 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4142 // Neon Shift-Insert vector operations,
4147 // 64-bit vector types.
4150 let Inst{21-19} = 0b001; // imm6 = 001xxx
4154 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4164 // 128-bit vector types.
4167 let Inst{21-19} = 0b001; // imm6 = 001xxx
4171 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4183 // 64-bit vector types.
4186 let Inst{21-19} = 0b001; // imm6 = 001xxx
4190 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4200 // 128-bit vector types.
4203 let Inst{21-19} = 0b001; // imm6 = 001xxx
4207 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4225 let Inst{21-19} = 0b001; // imm6 = 001xxx
4229 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4245 let Inst{21-19} = 0b001; // imm6 = 001xxx
4250 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4259 //===----------------------------------------------------------------------===//
4261 //===----------------------------------------------------------------------===//
4265 // VADD : Vector Add (integer and floating-point)
4324 // VMUL : Vector Multiply (integer, polynomial and floating-point)
4353 (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))),
4356 (DSubReg_i16_reg imm:$lane))),
4357 (SubReg_i16_lane imm:$lane)))>;
4359 (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))),
4362 (DSubReg_i32_reg imm:$lane))),
4363 (SubReg_i32_lane imm:$lane)))>;
4365 (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))),
4368 (DSubReg_i32_reg imm:$lane))),
4369 (SubReg_i32_lane imm:$lane)))>;
4371 (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))),
4374 (DSubReg_i16_reg imm:$lane))),
4375 (SubReg_i16_lane imm:$lane)))>;
4406 imm:$lane)))),
4409 (DSubReg_i16_reg imm:$lane))),
4410 (SubReg_i16_lane imm:$lane)))>;
4413 imm:$lane)))),
4416 (DSubReg_i32_reg imm:$lane))),
4417 (SubReg_i32_lane imm:$lane)))>;
4431 imm:$lane)))),
4434 (DSubReg_i16_reg imm:$lane))),
4435 (SubReg_i16_lane imm:$lane)))>;
4438 imm:$lane)))),
4441 (DSubReg_i32_reg imm:$lane))),
4442 (SubReg_i32_lane imm:$lane)))>;
4467 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
4469 // VMLA : Vector Multiply Accumulate (integer and floating-point)
4502 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4505 (DSubReg_i16_reg imm:$lane))),
4506 (SubReg_i16_lane imm:$lane)))>;
4510 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4513 (DSubReg_i32_reg imm:$lane))),
4514 (SubReg_i32_lane imm:$lane)))>;
4519 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4523 (DSubReg_i32_reg imm:$lane))),
4524 (SubReg_i32_lane imm:$lane)))>,
4537 // v8.1a Neon Rounding Double Multiply-Op vector operations,
4562 imm:$lane)))),
4564 imm:$lane))>;
4568 imm:$lane)))),
4570 imm:$lane))>;
4574 imm:$lane)))),
4579 (DSubReg_i16_reg imm:$lane))),
4580 (SubReg_i16_lane imm:$lane)))>;
4584 imm:$lane)))),
4589 (DSubReg_i32_reg imm:$lane))),
4590 (SubReg_i32_lane imm:$lane)))>;
4593 // (Q -= D * D)
4616 imm:$lane)))),
4617 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
4621 imm:$lane)))),
4623 imm:$lane))>;
4627 imm:$lane)))),
4632 (DSubReg_i16_reg imm:$lane))),
4633 (SubReg_i16_lane imm:$lane)))>;
4637 imm:$lane)))),
4642 (DSubReg_i32_reg imm:$lane))),
4643 (SubReg_i32_lane imm:$lane)))>;
4662 imm:$lane)))))),
4663 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4667 imm:$lane)))))),
4668 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4671 // VMLS : Vector Multiply Subtract (integer and floating-point)
4704 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4707 (DSubReg_i16_reg imm:$lane))),
4708 (SubReg_i16_lane imm:$lane)))>;
4712 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4715 (DSubReg_i32_reg imm:$lane))),
4716 (SubReg_i32_lane imm:$lane)))>;
4721 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4724 (DSubReg_i32_reg imm:$lane))),
4725 (SubReg_i32_lane imm:$lane)))>,
4728 // VMLSL : Vector Multiply Subtract Long (Q -= D * D)
4737 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4754 imm:$lane)))))),
4755 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4759 imm:$lane)))))),
4760 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4763 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4779 // Fused Vector Multiply Subtract (floating-point)
4842 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
4844 bit lane;
4845 let Inst{5} = lane;
4846 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
4857 VectorIndex32:$lane)))))),
4858 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
4889 (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm,
4891 bit lane;
4892 let Inst{5} = lane;
4893 let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane");
4903 VectorIndex32:$lane)))))),
4904 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
4914 VectorIndex32:$lane)))),
4916 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
4940 let Inst{24-23} = rot;
4956 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4958 bit lane;
4960 let Inst{21-20} = rot;
4961 let Inst{5} = lane;
4968 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4970 bit lane;
4972 let Inst{21-20} = rot;
4974 // This is needed because the lane operand does not have any bits in the
5031 VectorIndex32:$lane, complexrotateop:$rot),
5036 VectorIndex32:$lane, complexrotateop:$rot),
5042 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
5047 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
5080 // VSUB : Vector Subtract (integer and floating-point)
5093 // VSUBL : Vector Subtract Long (Q = D - D)
5098 // VSUBW : Vector Subtract Wide (Q = Q - D)
5115 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
5117 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
5254 // Vd, Vs, Vs[0-15], Idx[0-1]
5261 let Inst{19-16} = Vn{4-1};
5264 let Inst{2-0} = Vm{3-1};
5267 // Vq, Vd, Vd[0-7], Idx[0-3]
5372 let Inst{10-9} = SIMM{10-9};
5390 let Inst{10-9} = SIMM{10-9};
5428 let Inst{10-9} = SIMM{10-9};
5446 let Inst{10-9} = SIMM{10-9};
5488 let Inst{11-8} = SIMM{11-8};
5495 let Inst{11-8} = SIMM{11-8};
5658 // VABDL : Vector Absolute Difference Long (Q = | D - D |)
5679 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
5987 let Inst{21-16} = op21_16;
6153 // VNEG : Vector Negate (floating-point)
6256 let Inst{11-8} = SIMM{11-8};
6263 let Inst{11-8} = SIMM{11-8};
6292 // For last two instructions, for example, it should emit:
6313 // "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00"
6314 // "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000"
6334 // TODO: add "VMOV <-> VMVN" conversion for cases like
6335 // "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55"
6336 // "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00"
6338 // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
6342 // Even without these pseudo-insts we would probably end up with the correct
6357 // VMOV : Vector Get Lane (move scalar to ARM core register)
6360 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6361 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
6363 imm:$lane))]> {
6364 let Inst{21} = lane{2};
6365 let Inst{6-5} = lane{1-0};
6368 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6369 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
6371 imm:$lane))]> {
6372 let Inst{21} = lane{1};
6373 let Inst{6} = lane{0};
6376 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6377 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
6379 imm:$lane))]> {
6380 let Inst{21} = lane{2};
6381 let Inst{6-5} = lane{1-0};
6384 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6385 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
6387 imm:$lane))]> {
6388 let Inst{21} = lane{1};
6389 let Inst{6} = lane{0};
6392 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
6393 IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
6395 imm:$lane))]>,
6397 let Inst{21} = lane{0};
6400 def : InstAlias<"vmov${p} $R, $V$lane",
6401 (VGETLNi32 GPR:$R, DPR:$V, VectorIndex32:$lane, pred:$p), 0>,
6405 def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
6407 (DSubReg_i8_reg imm:$lane))),
6408 (SubReg_i8_lane imm:$lane))>;
6409 def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane),
6411 (DSubReg_i16_reg imm:$lane))),
6412 (SubReg_i16_lane imm:$lane))>;
6413 def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane),
6415 (DSubReg_i8_reg imm:$lane))),
6416 (SubReg_i8_lane imm:$lane))>;
6417 def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
6419 (DSubReg_i16_reg imm:$lane))),
6420 (SubReg_i16_lane imm:$lane))>;
6421 def : Pat<(ARMvgetlaneu (v8f16 QPR:$src), imm:$lane),
6423 (DSubReg_i16_reg imm:$lane))),
6424 (SubReg_i16_lane imm:$lane))>;
6425 def : Pat<(ARMvgetlaneu (v4f16 DPR:$src), imm:$lane),
6426 (VGETLNu16 (v4f16 DPR:$src), imm:$lane)>;
6427 def : Pat<(ARMvgetlaneu (v8bf16 QPR:$src), imm:$lane),
6429 (DSubReg_i16_reg imm:$lane))),
6430 (SubReg_i16_lane imm:$lane))>;
6431 def : Pat<(ARMvgetlaneu (v4bf16 DPR:$src), imm:$lane),
6432 (VGETLNu16 (v4bf16 DPR:$src), imm:$lane)>;
6434 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6436 (DSubReg_i32_reg imm:$lane))),
6437 (SubReg_i32_lane imm:$lane))>,
6439 def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
6441 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6443 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6445 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6461 def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane),
6464 (SSubReg_f16_reg imm_even:$lane))>;
6465 def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane),
6468 (SSubReg_f16_reg imm_even:$lane))>;
6472 def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane),
6476 (SSubReg_f16_reg imm_odd:$lane))),
6478 def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane),
6482 (SSubReg_f16_reg imm_odd:$lane))),
6499 // Otherwise, if VMOVH is not available resort to extracting the odd lane
6501 def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane),
6503 (VGETLNu16 (v4bf16 DPR:$src), imm:$lane),
6506 def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane),
6509 (DSubReg_i16_reg imm:$lane))),
6510 (SubReg_i16_lane imm:$lane)),
6514 // VMOV : Vector Set Lane (move ARM core register to scalar)
6518 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
6519 IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
6521 GPR:$R, imm:$lane))]> {
6522 let Inst{21} = lane{2};
6523 let Inst{6-5} = lane{1-0};
6526 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
6527 IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
6529 GPR:$R, imm:$lane))]> {
6530 let Inst{21} = lane{1};
6531 let Inst{6} = lane{0};
6534 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
6535 IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
6537 GPR:$R, imm:$lane))]>,
6539 let Inst{21} = lane{0};
6546 def : InstAlias<"vmov${p} $V$lane, $R",
6547 (VSETLNi32 DPR:$V, GPR:$R, VectorIndex32:$lane, pred:$p), 0>,
6553 def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane),
6555 (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>;
6556 def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane),
6559 (DSubReg_i16_reg imm:$lane))),
6561 (SubReg_i16_lane imm:$lane))),
6562 (DSubReg_i16_reg imm:$lane)))>;
6566 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
6569 (DSubReg_i8_reg imm:$lane))),
6570 GPR:$src2, (SubReg_i8_lane imm:$lane))),
6571 (DSubReg_i8_reg imm:$lane)))>;
6572 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
6575 (DSubReg_i16_reg imm:$lane))),
6576 GPR:$src2, (SubReg_i16_lane imm:$lane))),
6577 (DSubReg_i16_reg imm:$lane)))>;
6578 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
6581 (DSubReg_i32_reg imm:$lane))),
6582 GPR:$src2, (SubReg_i32_lane imm:$lane))),
6583 (DSubReg_i32_reg imm:$lane)))>;
6658 // ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
6664 // VDUP : Vector Duplicate Lane (from scalar to all elements)
6668 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6669 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
6670 [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>;
6674 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6675 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
6677 VectorIndex32:$lane)))]>;
6679 // Inst{19-16} is partially specified depending on the element size.
6682 bits<3> lane;
6683 let Inst{19-17} = lane{2-0};
6686 bits<2> lane;
6687 let Inst{19-18} = lane{1-0};
6690 bits<1> lane;
6691 let Inst{19} = lane{0};
6694 bits<3> lane;
6695 let Inst{19-17} = lane{2-0};
6698 bits<2> lane;
6699 let Inst{19-18} = lane{1-0};
6702 bits<1> lane;
6703 let Inst{19} = lane{0};
6707 def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)),
6708 (VDUPLN16d DPR:$Vm, imm:$lane)>;
6710 def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6711 (VDUPLN32d DPR:$Vm, imm:$lane)>;
6713 def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6714 (VDUPLN32q DPR:$Vm, imm:$lane)>;
6716 def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)),
6718 (DSubReg_i8_reg imm:$lane))),
6719 (SubReg_i8_lane imm:$lane)))>;
6720 def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)),
6722 (DSubReg_i16_reg imm:$lane))),
6723 (SubReg_i16_lane imm:$lane)))>;
6724 def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)),
6726 (DSubReg_i16_reg imm:$lane))),
6727 (SubReg_i16_lane imm:$lane)))>;
6728 def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)),
6730 (DSubReg_i32_reg imm:$lane))),
6731 (SubReg_i32_lane imm:$lane)))>;
6732 def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
6734 (DSubReg_i32_reg imm:$lane))),
6735 (SubReg_i32_lane imm:$lane)))>;
6752 def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)),
6753 (VDUPLN16d DPR:$Vm, imm:$lane)>;
6755 def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)),
6757 (DSubReg_i16_reg imm:$lane))),
6758 (SubReg_i16_lane imm:$lane)))>;
6790 // VCVT : Vector Convert Between Floating-Point and Integers
6867 // VCVT : Vector Convert Between Floating-Point and Fixed-Point.
6947 // VCVT : Vector Convert Between Half-Precision and Single-Precision.
6962 // VREV64 : Vector Reverse elements within 64-bit doublewords
6999 // VREV32 : Vector Reverse elements within 32-bit words
7029 // VREV16 : Vector Reverse elements within 16-bit halfwords
7065 // All of these have a two-operand InstAlias.
7075 let Inst{10-8} = index{2-0};
7085 let Inst{11-8} = index{3-0};
7090 let Inst{10-8} = index{2-0};
7093 let Inst{10-9} = index{1-0};
7105 let Inst{9-8} = 0b00;
7113 let Inst{11-8} = index{3-0};
7116 let Inst{11-9} = index{2-0};
7127 let Inst{11-10} = index{1-0};
7128 let Inst{9-8} = 0b00;
7132 let Inst{10-8} = 0b000;
7153 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
7165 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
7288 let Inst{9-7} = op9_7;
7293 let Inst{9-7} = op9_7;
7299 let Inst{9-7} = op9_7;
7305 let Inst{9-7} = op9_7;
7402 //===----------------------------------------------------------------------===//
7403 // NEON instructions for single-precision FP math
7404 //===----------------------------------------------------------------------===//
7489 def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7490 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7491 def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7492 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7493 def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7494 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7495 def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7496 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7499 // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
7507 //===----------------------------------------------------------------------===//
7508 // Non-Instruction Patterns or Endianess - Revert Patterns
7509 //===----------------------------------------------------------------------===//
7780 // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
7793 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
7794 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>,
7956 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
8027 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
8028 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
8029 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
8032 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
8033 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
8035 // Double lengthening - v4i8 -> v4i16 -> v4i32
8037 // v2i8 -> v2i16 -> v2i32
8039 // v2i16 -> v2i32 -> v2i64
8044 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
8045 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
8047 // Double lengthening - v4i8 -> v4i16 -> v4i32
8049 // v2i8 -> v2i16 -> v2i32
8051 // v2i16 -> v2i32 -> v2i64
8055 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
8108 //===----------------------------------------------------------------------===//
8134 // ... two-operand aliases
8158 // VLD1 single-lane pseudo-instructions. These need special handling for
8159 // the lane index that an InstAlias can't handle, so we use these instead.
8196 // VST1 single-lane pseudo-instructions. These need special handling for
8197 // the lane index that an InstAlias can't handle, so we use these instead.
8233 // VLD2 single-lane pseudo-instructions. These need special handling for
8234 // the lane index that an InstAlias can't handle, so we use these instead.
8292 // VST2 single-lane pseudo-instructions. These need special handling for
8293 // the lane index that an InstAlias can't handle, so we use these instead.
8351 // VLD3 all-lanes pseudo-instructions. These need special handling for
8352 // the lane index that an InstAlias can't handle, so we use these instead.
8422 // VLD3 single-lane pseudo-instructions. These need special handling for
8423 // the lane index that an InstAlias can't handle, so we use these instead.
8481 // VLD3 multiple structure pseudo-instructions. These need special handling for
8540 // VST3 single-lane pseudo-instructions. These need special handling for
8541 // the lane index that an InstAlias can't handle, so we use these instead.
8600 // VST3 multiple structure pseudo-instructions. These need special handling for
8659 // VLD4 all-lanes pseudo-instructions. These need special handling for
8660 // the lane index that an InstAlias can't handle, so we use these instead.
8730 // VLD4 single-lane pseudo-instructions. These need special handling for
8731 // the lane index that an InstAlias can't handle, so we use these instead.
8791 // VLD4 multiple structure pseudo-instructions. These need special handling for
8862 // VST4 single-lane pseudo-instructions. These need special handling for
8863 // the lane index that an InstAlias can't handle, so we use these instead.
8922 // VST4 multiple structure pseudo-instructions. These need special handling for
9005 // D-register versions.
9023 // Q-register versions.
9043 // D-register versions.
9061 // Q-register versions.
9100 // "vmov Rd, #-imm" can be handled via "vmvn".
9110 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
9186 DPR_VFP2:$Vm, VectorIndex32:$lane), []> {
9187 bit lane;
9188 let Inst{5} = lane;
9190 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane");
9199 VectorIndex32:$lane)))))),
9200 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
9254 VectorIndex16:$lane)))),
9258 (DSubReg_i16_reg VectorIndex16:$lane)),
9259 (SubReg_i16_lane VectorIndex16:$lane))>;