1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "MCTargetDesc/LoongArchBaseInfo.h"
20 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/CodeGen/ISDOpcodes.h"
25 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/IntrinsicsLoongArch.h"
30 #include "llvm/Support/CodeGen.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/KnownBits.h"
34 #include "llvm/Support/MathExtras.h"
35 #include <llvm/Analysis/VectorUtils.h>
36
37 using namespace llvm;
38
39 #define DEBUG_TYPE "loongarch-isel-lowering"
40
41 STATISTIC(NumTailCalls, "Number of tail calls");
42
43 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
44 cl::desc("Trap on integer division by zero."),
45 cl::init(false));
46
LoongArchTargetLowering(const TargetMachine & TM,const LoongArchSubtarget & STI)47 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
48 const LoongArchSubtarget &STI)
49 : TargetLowering(TM), Subtarget(STI) {
50
51 MVT GRLenVT = Subtarget.getGRLenVT();
52
53 // Set up the register classes.
54
55 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
56 if (Subtarget.hasBasicF())
57 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
58 if (Subtarget.hasBasicD())
59 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
60
61 static const MVT::SimpleValueType LSXVTs[] = {
62 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
63 static const MVT::SimpleValueType LASXVTs[] = {
64 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
65
66 if (Subtarget.hasExtLSX())
67 for (MVT VT : LSXVTs)
68 addRegisterClass(VT, &LoongArch::LSX128RegClass);
69
70 if (Subtarget.hasExtLASX())
71 for (MVT VT : LASXVTs)
72 addRegisterClass(VT, &LoongArch::LASX256RegClass);
73
74 // Set operations for LA32 and LA64.
75
76 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
77 MVT::i1, Promote);
78
79 setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
80 setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
81 setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
82 setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
83 setOperationAction(ISD::ROTL, GRLenVT, Expand);
84 setOperationAction(ISD::CTPOP, GRLenVT, Expand);
85
86 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
87 ISD::JumpTable, ISD::GlobalTLSAddress},
88 GRLenVT, Custom);
89
90 setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
91
92 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
93 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
94 setOperationAction(ISD::VASTART, MVT::Other, Custom);
95 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
96
97 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
98 setOperationAction(ISD::TRAP, MVT::Other, Legal);
99
100 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
101 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
102 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
103
104 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
105
106 // BITREV/REVB requires the 32S feature.
107 if (STI.has32S()) {
108 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
109 // we get to know which of sll and revb.2h is faster.
110 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
111 setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
112
113 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
114 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
115 // and i32 could still be byte-swapped relatively cheaply.
116 setOperationAction(ISD::BSWAP, MVT::i16, Custom);
117 } else {
118 setOperationAction(ISD::BSWAP, GRLenVT, Expand);
119 setOperationAction(ISD::CTTZ, GRLenVT, Expand);
120 setOperationAction(ISD::CTLZ, GRLenVT, Expand);
121 setOperationAction(ISD::ROTR, GRLenVT, Expand);
122 setOperationAction(ISD::SELECT, GRLenVT, Custom);
123 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
124 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
125 }
126
127 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
128 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
129 setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
130 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
131 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
132
133 setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
134 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
135
136 // Set operations for LA64 only.
137
138 if (Subtarget.is64Bit()) {
139 setOperationAction(ISD::ADD, MVT::i32, Custom);
140 setOperationAction(ISD::SUB, MVT::i32, Custom);
141 setOperationAction(ISD::SHL, MVT::i32, Custom);
142 setOperationAction(ISD::SRA, MVT::i32, Custom);
143 setOperationAction(ISD::SRL, MVT::i32, Custom);
144 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
145 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
146 setOperationAction(ISD::ROTR, MVT::i32, Custom);
147 setOperationAction(ISD::ROTL, MVT::i32, Custom);
148 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
149 setOperationAction(ISD::CTLZ, MVT::i32, Custom);
150 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
151 setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
152 setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
153 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
154 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
155 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
156
157 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
158 setOperationAction(ISD::BSWAP, MVT::i32, Custom);
159 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
160 Custom);
161 setOperationAction(ISD::LROUND, MVT::i32, Custom);
162 }
163
164 // Set operations for LA32 only.
165
166 if (!Subtarget.is64Bit()) {
167 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
168 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
169 setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
170 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
171 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
172 if (Subtarget.hasBasicD())
173 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
174 }
175
176 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
177
178 static const ISD::CondCode FPCCToExpand[] = {
179 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
180 ISD::SETGE, ISD::SETNE, ISD::SETGT};
181
182 // Set operations for 'F' feature.
183
184 if (Subtarget.hasBasicF()) {
185 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
186 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
187 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
189 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
190
191 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
192 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
193 setOperationAction(ISD::FMA, MVT::f32, Legal);
194 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
195 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
196 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
197 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
198 setOperationAction(ISD::FCANONICALIZE, MVT::f32, Legal);
199 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
200 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
201 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
202 setOperationAction(ISD::FSIN, MVT::f32, Expand);
203 setOperationAction(ISD::FCOS, MVT::f32, Expand);
204 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
205 setOperationAction(ISD::FPOW, MVT::f32, Expand);
206 setOperationAction(ISD::FREM, MVT::f32, Expand);
207 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
208 Subtarget.isSoftFPABI() ? LibCall : Custom);
209 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
210 Subtarget.isSoftFPABI() ? LibCall : Custom);
211 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
212 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
213 Subtarget.isSoftFPABI() ? LibCall : Custom);
214
215 if (Subtarget.is64Bit())
216 setOperationAction(ISD::FRINT, MVT::f32, Legal);
217
218 if (!Subtarget.hasBasicD()) {
219 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
220 if (Subtarget.is64Bit()) {
221 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
222 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
223 }
224 }
225 }
226
227 // Set operations for 'D' feature.
228
229 if (Subtarget.hasBasicD()) {
230 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
231 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
232 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
233 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
234 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
235 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
236 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
237
238 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
239 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
240 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
241 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
242 setOperationAction(ISD::FMA, MVT::f64, Legal);
243 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
244 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
245 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
246 setOperationAction(ISD::FCANONICALIZE, MVT::f64, Legal);
247 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
248 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
249 setOperationAction(ISD::FSIN, MVT::f64, Expand);
250 setOperationAction(ISD::FCOS, MVT::f64, Expand);
251 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
252 setOperationAction(ISD::FPOW, MVT::f64, Expand);
253 setOperationAction(ISD::FREM, MVT::f64, Expand);
254 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
255 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
256 Subtarget.isSoftFPABI() ? LibCall : Custom);
257 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
258 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
259 Subtarget.isSoftFPABI() ? LibCall : Custom);
260
261 if (Subtarget.is64Bit())
262 setOperationAction(ISD::FRINT, MVT::f64, Legal);
263 }
264
265 // Set operations for 'LSX' feature.
266
267 if (Subtarget.hasExtLSX()) {
268 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
269 // Expand all truncating stores and extending loads.
270 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
271 setTruncStoreAction(VT, InnerVT, Expand);
272 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
273 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
274 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
275 }
276 // By default everything must be expanded. Then we will selectively turn
277 // on ones that can be effectively codegen'd.
278 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
279 setOperationAction(Op, VT, Expand);
280 }
281
282 for (MVT VT : LSXVTs) {
283 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
284 setOperationAction(ISD::BITCAST, VT, Legal);
285 setOperationAction(ISD::UNDEF, VT, Legal);
286
287 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
288 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
289 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
290
291 setOperationAction(ISD::SETCC, VT, Legal);
292 setOperationAction(ISD::VSELECT, VT, Legal);
293 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
294 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
295 }
296 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
297 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
298 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
299 Legal);
300 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
301 VT, Legal);
302 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
303 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
304 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
305 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
306 setCondCodeAction(
307 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
308 Expand);
309 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
310 setOperationAction(ISD::ABDS, VT, Legal);
311 setOperationAction(ISD::ABDU, VT, Legal);
312 }
313 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
314 setOperationAction(ISD::BITREVERSE, VT, Custom);
315 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
316 setOperationAction(ISD::BSWAP, VT, Legal);
317 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
318 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
319 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
320 }
321 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
322 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
323 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
324 setOperationAction(ISD::FMA, VT, Legal);
325 setOperationAction(ISD::FSQRT, VT, Legal);
326 setOperationAction(ISD::FNEG, VT, Legal);
327 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
328 ISD::SETUGE, ISD::SETUGT},
329 VT, Expand);
330 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
331 }
332 setOperationAction(ISD::CTPOP, GRLenVT, Legal);
333 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
334 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
335 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
336 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
337
338 for (MVT VT :
339 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
340 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
341 setOperationAction(ISD::TRUNCATE, VT, Custom);
342 }
343 }
344
345 // Set operations for 'LASX' feature.
346
347 if (Subtarget.hasExtLASX()) {
348 for (MVT VT : LASXVTs) {
349 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
350 setOperationAction(ISD::BITCAST, VT, Legal);
351 setOperationAction(ISD::UNDEF, VT, Legal);
352
353 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
354 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
355 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
356 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
357 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
358
359 setOperationAction(ISD::SETCC, VT, Legal);
360 setOperationAction(ISD::VSELECT, VT, Legal);
361 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
362 }
363 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
364 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
365 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
366 Legal);
367 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
368 VT, Legal);
369 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
370 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
371 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
372 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
373 setCondCodeAction(
374 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
375 Expand);
376 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
377 setOperationAction(ISD::ABDS, VT, Legal);
378 setOperationAction(ISD::ABDU, VT, Legal);
379 }
380 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
381 setOperationAction(ISD::BITREVERSE, VT, Custom);
382 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
383 setOperationAction(ISD::BSWAP, VT, Legal);
384 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
385 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
386 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
387 }
388 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
389 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
390 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
391 setOperationAction(ISD::FMA, VT, Legal);
392 setOperationAction(ISD::FSQRT, VT, Legal);
393 setOperationAction(ISD::FNEG, VT, Legal);
394 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
395 ISD::SETUGE, ISD::SETUGT},
396 VT, Expand);
397 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
398 }
399 }
400
401 // Set DAG combine for LA32 and LA64.
402
403 setTargetDAGCombine(ISD::AND);
404 setTargetDAGCombine(ISD::OR);
405 setTargetDAGCombine(ISD::SRL);
406 setTargetDAGCombine(ISD::SETCC);
407
408 // Set DAG combine for 'LSX' feature.
409
410 if (Subtarget.hasExtLSX()) {
411 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
412 setTargetDAGCombine(ISD::BITCAST);
413 }
414
415 // Compute derived properties from the register classes.
416 computeRegisterProperties(Subtarget.getRegisterInfo());
417
418 setStackPointerRegisterToSaveRestore(LoongArch::R3);
419
420 setBooleanContents(ZeroOrOneBooleanContent);
421 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
422
423 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
424
425 setMinCmpXchgSizeInBits(32);
426
427 // Function alignments.
428 setMinFunctionAlignment(Align(4));
429 // Set preferred alignments.
430 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
431 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
432 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
433
434 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
435 if (Subtarget.hasLAMCAS())
436 setMinCmpXchgSizeInBits(8);
437
438 if (Subtarget.hasSCQ()) {
439 setMaxAtomicSizeInBitsSupported(128);
440 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
441 }
442 }
443
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const444 bool LoongArchTargetLowering::isOffsetFoldingLegal(
445 const GlobalAddressSDNode *GA) const {
446 // In order to maximise the opportunity for common subexpression elimination,
447 // keep a separate ADD node for the global address offset instead of folding
448 // it in the global address node. Later peephole optimisations may choose to
449 // fold it back in when profitable.
450 return false;
451 }
452
LowerOperation(SDValue Op,SelectionDAG & DAG) const453 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
454 SelectionDAG &DAG) const {
455 switch (Op.getOpcode()) {
456 case ISD::ATOMIC_FENCE:
457 return lowerATOMIC_FENCE(Op, DAG);
458 case ISD::EH_DWARF_CFA:
459 return lowerEH_DWARF_CFA(Op, DAG);
460 case ISD::GlobalAddress:
461 return lowerGlobalAddress(Op, DAG);
462 case ISD::GlobalTLSAddress:
463 return lowerGlobalTLSAddress(Op, DAG);
464 case ISD::INTRINSIC_WO_CHAIN:
465 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
466 case ISD::INTRINSIC_W_CHAIN:
467 return lowerINTRINSIC_W_CHAIN(Op, DAG);
468 case ISD::INTRINSIC_VOID:
469 return lowerINTRINSIC_VOID(Op, DAG);
470 case ISD::BlockAddress:
471 return lowerBlockAddress(Op, DAG);
472 case ISD::JumpTable:
473 return lowerJumpTable(Op, DAG);
474 case ISD::SHL_PARTS:
475 return lowerShiftLeftParts(Op, DAG);
476 case ISD::SRA_PARTS:
477 return lowerShiftRightParts(Op, DAG, true);
478 case ISD::SRL_PARTS:
479 return lowerShiftRightParts(Op, DAG, false);
480 case ISD::ConstantPool:
481 return lowerConstantPool(Op, DAG);
482 case ISD::FP_TO_SINT:
483 return lowerFP_TO_SINT(Op, DAG);
484 case ISD::BITCAST:
485 return lowerBITCAST(Op, DAG);
486 case ISD::UINT_TO_FP:
487 return lowerUINT_TO_FP(Op, DAG);
488 case ISD::SINT_TO_FP:
489 return lowerSINT_TO_FP(Op, DAG);
490 case ISD::VASTART:
491 return lowerVASTART(Op, DAG);
492 case ISD::FRAMEADDR:
493 return lowerFRAMEADDR(Op, DAG);
494 case ISD::RETURNADDR:
495 return lowerRETURNADDR(Op, DAG);
496 case ISD::WRITE_REGISTER:
497 return lowerWRITE_REGISTER(Op, DAG);
498 case ISD::INSERT_VECTOR_ELT:
499 return lowerINSERT_VECTOR_ELT(Op, DAG);
500 case ISD::EXTRACT_VECTOR_ELT:
501 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
502 case ISD::BUILD_VECTOR:
503 return lowerBUILD_VECTOR(Op, DAG);
504 case ISD::CONCAT_VECTORS:
505 return lowerCONCAT_VECTORS(Op, DAG);
506 case ISD::VECTOR_SHUFFLE:
507 return lowerVECTOR_SHUFFLE(Op, DAG);
508 case ISD::BITREVERSE:
509 return lowerBITREVERSE(Op, DAG);
510 case ISD::SCALAR_TO_VECTOR:
511 return lowerSCALAR_TO_VECTOR(Op, DAG);
512 case ISD::PREFETCH:
513 return lowerPREFETCH(Op, DAG);
514 case ISD::SELECT:
515 return lowerSELECT(Op, DAG);
516 case ISD::FP_TO_FP16:
517 return lowerFP_TO_FP16(Op, DAG);
518 case ISD::FP16_TO_FP:
519 return lowerFP16_TO_FP(Op, DAG);
520 case ISD::FP_TO_BF16:
521 return lowerFP_TO_BF16(Op, DAG);
522 case ISD::BF16_TO_FP:
523 return lowerBF16_TO_FP(Op, DAG);
524 }
525 return SDValue();
526 }
527
lowerPREFETCH(SDValue Op,SelectionDAG & DAG) const528 SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
529 SelectionDAG &DAG) const {
530 unsigned IsData = Op.getConstantOperandVal(4);
531
532 // We don't support non-data prefetch.
533 // Just preserve the chain.
534 if (!IsData)
535 return Op.getOperand(0);
536
537 return Op;
538 }
539
540 // Return true if Val is equal to (setcc LHS, RHS, CC).
541 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
542 // Otherwise, return std::nullopt.
matchSetCC(SDValue LHS,SDValue RHS,ISD::CondCode CC,SDValue Val)543 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
544 ISD::CondCode CC, SDValue Val) {
545 assert(Val->getOpcode() == ISD::SETCC);
546 SDValue LHS2 = Val.getOperand(0);
547 SDValue RHS2 = Val.getOperand(1);
548 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
549
550 if (LHS == LHS2 && RHS == RHS2) {
551 if (CC == CC2)
552 return true;
553 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
554 return false;
555 } else if (LHS == RHS2 && RHS == LHS2) {
556 CC2 = ISD::getSetCCSwappedOperands(CC2);
557 if (CC == CC2)
558 return true;
559 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
560 return false;
561 }
562
563 return std::nullopt;
564 }
565
combineSelectToBinOp(SDNode * N,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget)566 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
567 const LoongArchSubtarget &Subtarget) {
568 SDValue CondV = N->getOperand(0);
569 SDValue TrueV = N->getOperand(1);
570 SDValue FalseV = N->getOperand(2);
571 MVT VT = N->getSimpleValueType(0);
572 SDLoc DL(N);
573
574 // (select c, -1, y) -> -c | y
575 if (isAllOnesConstant(TrueV)) {
576 SDValue Neg = DAG.getNegative(CondV, DL, VT);
577 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
578 }
579 // (select c, y, -1) -> (c-1) | y
580 if (isAllOnesConstant(FalseV)) {
581 SDValue Neg =
582 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
583 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
584 }
585
586 // (select c, 0, y) -> (c-1) & y
587 if (isNullConstant(TrueV)) {
588 SDValue Neg =
589 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
590 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
591 }
592 // (select c, y, 0) -> -c & y
593 if (isNullConstant(FalseV)) {
594 SDValue Neg = DAG.getNegative(CondV, DL, VT);
595 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
596 }
597
598 // select c, ~x, x --> xor -c, x
599 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
600 const APInt &TrueVal = TrueV->getAsAPIntVal();
601 const APInt &FalseVal = FalseV->getAsAPIntVal();
602 if (~TrueVal == FalseVal) {
603 SDValue Neg = DAG.getNegative(CondV, DL, VT);
604 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
605 }
606 }
607
608 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
609 // when both truev and falsev are also setcc.
610 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
611 FalseV.getOpcode() == ISD::SETCC) {
612 SDValue LHS = CondV.getOperand(0);
613 SDValue RHS = CondV.getOperand(1);
614 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
615
616 // (select x, x, y) -> x | y
617 // (select !x, x, y) -> x & y
618 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
619 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
620 DAG.getFreeze(FalseV));
621 }
622 // (select x, y, x) -> x & y
623 // (select !x, y, x) -> x | y
624 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
625 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
626 DAG.getFreeze(TrueV), FalseV);
627 }
628 }
629
630 return SDValue();
631 }
632
633 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
634 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
635 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
636 // being `0` or `-1`. In such cases we can replace `select` with `and`.
637 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
638 // than `c0`?
639 static SDValue
foldBinOpIntoSelectIfProfitable(SDNode * BO,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget)640 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
641 const LoongArchSubtarget &Subtarget) {
642 unsigned SelOpNo = 0;
643 SDValue Sel = BO->getOperand(0);
644 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
645 SelOpNo = 1;
646 Sel = BO->getOperand(1);
647 }
648
649 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
650 return SDValue();
651
652 unsigned ConstSelOpNo = 1;
653 unsigned OtherSelOpNo = 2;
654 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
655 ConstSelOpNo = 2;
656 OtherSelOpNo = 1;
657 }
658 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
659 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
660 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
661 return SDValue();
662
663 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
664 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
665 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
666 return SDValue();
667
668 SDLoc DL(Sel);
669 EVT VT = BO->getValueType(0);
670
671 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
672 if (SelOpNo == 1)
673 std::swap(NewConstOps[0], NewConstOps[1]);
674
675 SDValue NewConstOp =
676 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
677 if (!NewConstOp)
678 return SDValue();
679
680 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
681 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
682 return SDValue();
683
684 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
685 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
686 if (SelOpNo == 1)
687 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
688 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
689
690 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
691 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
692 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
693 }
694
695 // Changes the condition code and swaps operands if necessary, so the SetCC
696 // operation matches one of the comparisons supported directly by branches
697 // in the LoongArch ISA. May adjust compares to favor compare with 0 over
698 // compare with 1/-1.
translateSetCCForBranch(const SDLoc & DL,SDValue & LHS,SDValue & RHS,ISD::CondCode & CC,SelectionDAG & DAG)699 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
700 ISD::CondCode &CC, SelectionDAG &DAG) {
701 // If this is a single bit test that can't be handled by ANDI, shift the
702 // bit to be tested to the MSB and perform a signed compare with 0.
703 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
704 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
705 isa<ConstantSDNode>(LHS.getOperand(1))) {
706 uint64_t Mask = LHS.getConstantOperandVal(1);
707 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
708 unsigned ShAmt = 0;
709 if (isPowerOf2_64(Mask)) {
710 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
711 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
712 } else {
713 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
714 }
715
716 LHS = LHS.getOperand(0);
717 if (ShAmt != 0)
718 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
719 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
720 return;
721 }
722 }
723
724 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
725 int64_t C = RHSC->getSExtValue();
726 switch (CC) {
727 default:
728 break;
729 case ISD::SETGT:
730 // Convert X > -1 to X >= 0.
731 if (C == -1) {
732 RHS = DAG.getConstant(0, DL, RHS.getValueType());
733 CC = ISD::SETGE;
734 return;
735 }
736 break;
737 case ISD::SETLT:
738 // Convert X < 1 to 0 >= X.
739 if (C == 1) {
740 RHS = LHS;
741 LHS = DAG.getConstant(0, DL, RHS.getValueType());
742 CC = ISD::SETGE;
743 return;
744 }
745 break;
746 }
747 }
748
749 switch (CC) {
750 default:
751 break;
752 case ISD::SETGT:
753 case ISD::SETLE:
754 case ISD::SETUGT:
755 case ISD::SETULE:
756 CC = ISD::getSetCCSwappedOperands(CC);
757 std::swap(LHS, RHS);
758 break;
759 }
760 }
761
lowerSELECT(SDValue Op,SelectionDAG & DAG) const762 SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
763 SelectionDAG &DAG) const {
764 SDValue CondV = Op.getOperand(0);
765 SDValue TrueV = Op.getOperand(1);
766 SDValue FalseV = Op.getOperand(2);
767 SDLoc DL(Op);
768 MVT VT = Op.getSimpleValueType();
769 MVT GRLenVT = Subtarget.getGRLenVT();
770
771 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
772 return V;
773
774 if (Op.hasOneUse()) {
775 unsigned UseOpc = Op->user_begin()->getOpcode();
776 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
777 SDNode *BinOp = *Op->user_begin();
778 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
779 DAG, Subtarget)) {
780 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
781 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
782 // may return a constant node and cause crash in lowerSELECT.
783 if (NewSel.getOpcode() == ISD::SELECT)
784 return lowerSELECT(NewSel, DAG);
785 return NewSel;
786 }
787 }
788 }
789
790 // If the condition is not an integer SETCC which operates on GRLenVT, we need
791 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
792 // (select condv, truev, falsev)
793 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
794 if (CondV.getOpcode() != ISD::SETCC ||
795 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
796 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
797 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
798
799 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
800
801 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
802 }
803
804 // If the CondV is the output of a SETCC node which operates on GRLenVT
805 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
806 // to take advantage of the integer compare+branch instructions. i.e.: (select
807 // (setcc lhs, rhs, cc), truev, falsev)
808 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
809 SDValue LHS = CondV.getOperand(0);
810 SDValue RHS = CondV.getOperand(1);
811 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
812
813 // Special case for a select of 2 constants that have a difference of 1.
814 // Normally this is done by DAGCombine, but if the select is introduced by
815 // type legalization or op legalization, we miss it. Restricting to SETLT
816 // case for now because that is what signed saturating add/sub need.
817 // FIXME: We don't need the condition to be SETLT or even a SETCC,
818 // but we would probably want to swap the true/false values if the condition
819 // is SETGE/SETLE to avoid an XORI.
820 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
821 CCVal == ISD::SETLT) {
822 const APInt &TrueVal = TrueV->getAsAPIntVal();
823 const APInt &FalseVal = FalseV->getAsAPIntVal();
824 if (TrueVal - 1 == FalseVal)
825 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
826 if (TrueVal + 1 == FalseVal)
827 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
828 }
829
830 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
831 // 1 < x ? x : 1 -> 0 < x ? x : 1
832 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
833 RHS == TrueV && LHS == FalseV) {
834 LHS = DAG.getConstant(0, DL, VT);
835 // 0 <u x is the same as x != 0.
836 if (CCVal == ISD::SETULT) {
837 std::swap(LHS, RHS);
838 CCVal = ISD::SETNE;
839 }
840 }
841
842 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
843 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
844 RHS == FalseV) {
845 RHS = DAG.getConstant(0, DL, VT);
846 }
847
848 SDValue TargetCC = DAG.getCondCode(CCVal);
849
850 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
851 // (select (setcc lhs, rhs, CC), constant, falsev)
852 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
853 std::swap(TrueV, FalseV);
854 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
855 }
856
857 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
858 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
859 }
860
861 SDValue
lowerSCALAR_TO_VECTOR(SDValue Op,SelectionDAG & DAG) const862 LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
863 SelectionDAG &DAG) const {
864 SDLoc DL(Op);
865 MVT OpVT = Op.getSimpleValueType();
866
867 SDValue Vector = DAG.getUNDEF(OpVT);
868 SDValue Val = Op.getOperand(0);
869 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
870
871 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
872 }
873
lowerBITREVERSE(SDValue Op,SelectionDAG & DAG) const874 SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
875 SelectionDAG &DAG) const {
876 EVT ResTy = Op->getValueType(0);
877 SDValue Src = Op->getOperand(0);
878 SDLoc DL(Op);
879
880 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
881 unsigned int OrigEltNum = ResTy.getVectorNumElements();
882 unsigned int NewEltNum = NewVT.getVectorNumElements();
883
884 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
885
886 SmallVector<SDValue, 8> Ops;
887 for (unsigned int i = 0; i < NewEltNum; i++) {
888 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
889 DAG.getConstant(i, DL, MVT::i64));
890 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
891 ? (unsigned)LoongArchISD::BITREV_8B
892 : (unsigned)ISD::BITREVERSE;
893 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
894 }
895 SDValue Res =
896 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
897
898 switch (ResTy.getSimpleVT().SimpleTy) {
899 default:
900 return SDValue();
901 case MVT::v16i8:
902 case MVT::v32i8:
903 return Res;
904 case MVT::v8i16:
905 case MVT::v16i16:
906 case MVT::v4i32:
907 case MVT::v8i32: {
908 SmallVector<int, 32> Mask;
909 for (unsigned int i = 0; i < NewEltNum; i++)
910 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
911 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
912 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
913 }
914 }
915 }
916
917 // Widen element type to get a new mask value (if possible).
918 // For example:
919 // shufflevector <4 x i32> %a, <4 x i32> %b,
920 // <4 x i32> <i32 6, i32 7, i32 2, i32 3>
921 // is equivalent to:
922 // shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
923 // can be lowered to:
924 // VPACKOD_D vr0, vr0, vr1
widenShuffleMask(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)925 static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
926 SDValue V1, SDValue V2, SelectionDAG &DAG) {
927 unsigned EltBits = VT.getScalarSizeInBits();
928
929 if (EltBits > 32 || EltBits == 1)
930 return SDValue();
931
932 SmallVector<int, 8> NewMask;
933 if (widenShuffleMaskElts(Mask, NewMask)) {
934 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
935 : MVT::getIntegerVT(EltBits * 2);
936 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
937 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
938 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
939 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
940 return DAG.getBitcast(
941 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
942 }
943 }
944
945 return SDValue();
946 }
947
948 /// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
949 /// instruction.
950 // The funciton matches elements from one of the input vector shuffled to the
951 // left or right with zeroable elements 'shifted in'. It handles both the
952 // strictly bit-wise element shifts and the byte shfit across an entire 128-bit
953 // lane.
954 // Mostly copied from X86.
matchShuffleAsShift(MVT & ShiftVT,unsigned & Opcode,unsigned ScalarSizeInBits,ArrayRef<int> Mask,int MaskOffset,const APInt & Zeroable)955 static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
956 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
957 int MaskOffset, const APInt &Zeroable) {
958 int Size = Mask.size();
959 unsigned SizeInBits = Size * ScalarSizeInBits;
960
961 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
962 for (int i = 0; i < Size; i += Scale)
963 for (int j = 0; j < Shift; ++j)
964 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
965 return false;
966
967 return true;
968 };
969
970 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
971 int Step = 1) {
972 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
973 if (!(Mask[i] == -1 || Mask[i] == Low))
974 return false;
975 return true;
976 };
977
978 auto MatchShift = [&](int Shift, int Scale, bool Left) {
979 for (int i = 0; i != Size; i += Scale) {
980 unsigned Pos = Left ? i + Shift : i;
981 unsigned Low = Left ? i : i + Shift;
982 unsigned Len = Scale - Shift;
983 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
984 return -1;
985 }
986
987 int ShiftEltBits = ScalarSizeInBits * Scale;
988 bool ByteShift = ShiftEltBits > 64;
989 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
990 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
991 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
992
993 // Normalize the scale for byte shifts to still produce an i64 element
994 // type.
995 Scale = ByteShift ? Scale / 2 : Scale;
996
997 // We need to round trip through the appropriate type for the shift.
998 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
999 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1000 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1001 return (int)ShiftAmt;
1002 };
1003
1004 unsigned MaxWidth = 128;
1005 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1006 for (int Shift = 1; Shift != Scale; ++Shift)
1007 for (bool Left : {true, false})
1008 if (CheckZeros(Shift, Scale, Left)) {
1009 int ShiftAmt = MatchShift(Shift, Scale, Left);
1010 if (0 < ShiftAmt)
1011 return ShiftAmt;
1012 }
1013
1014 // no match
1015 return -1;
1016 }
1017
1018 /// Lower VECTOR_SHUFFLE as shift (if possible).
1019 ///
1020 /// For example:
1021 /// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1022 /// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1023 /// is lowered to:
1024 /// (VBSLL_V $v0, $v0, 4)
1025 ///
1026 /// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1027 /// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1028 /// is lowered to:
1029 /// (VSLLI_D $v0, $v0, 32)
lowerVECTOR_SHUFFLEAsShift(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG,const APInt & Zeroable)1030 static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef<int> Mask,
1031 MVT VT, SDValue V1, SDValue V2,
1032 SelectionDAG &DAG,
1033 const APInt &Zeroable) {
1034 int Size = Mask.size();
1035 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1036
1037 MVT ShiftVT;
1038 SDValue V = V1;
1039 unsigned Opcode;
1040
1041 // Try to match shuffle against V1 shift.
1042 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1043 Mask, 0, Zeroable);
1044
1045 // If V1 failed, try to match shuffle against V2 shift.
1046 if (ShiftAmt < 0) {
1047 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1048 Mask, Size, Zeroable);
1049 V = V2;
1050 }
1051
1052 if (ShiftAmt < 0)
1053 return SDValue();
1054
1055 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1056 "Illegal integer vector type");
1057 V = DAG.getBitcast(ShiftVT, V);
1058 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1059 DAG.getConstant(ShiftAmt, DL, MVT::i64));
1060 return DAG.getBitcast(VT, V);
1061 }
1062
1063 /// Determine whether a range fits a regular pattern of values.
1064 /// This function accounts for the possibility of jumping over the End iterator.
1065 template <typename ValType>
1066 static bool
fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,unsigned CheckStride,typename SmallVectorImpl<ValType>::const_iterator End,ValType ExpectedIndex,unsigned ExpectedIndexStride)1067 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
1068 unsigned CheckStride,
1069 typename SmallVectorImpl<ValType>::const_iterator End,
1070 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1071 auto &I = Begin;
1072
1073 while (I != End) {
1074 if (*I != -1 && *I != ExpectedIndex)
1075 return false;
1076 ExpectedIndex += ExpectedIndexStride;
1077
1078 // Incrementing past End is undefined behaviour so we must increment one
1079 // step at a time and check for End at each step.
1080 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1081 ; // Empty loop body.
1082 }
1083 return true;
1084 }
1085
1086 /// Compute whether each element of a shuffle is zeroable.
1087 ///
1088 /// A "zeroable" vector shuffle element is one which can be lowered to zero.
computeZeroableShuffleElements(ArrayRef<int> Mask,SDValue V1,SDValue V2,APInt & KnownUndef,APInt & KnownZero)1089 static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
1090 SDValue V2, APInt &KnownUndef,
1091 APInt &KnownZero) {
1092 int Size = Mask.size();
1093 KnownUndef = KnownZero = APInt::getZero(Size);
1094
1095 V1 = peekThroughBitcasts(V1);
1096 V2 = peekThroughBitcasts(V2);
1097
1098 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1099 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1100
1101 int VectorSizeInBits = V1.getValueSizeInBits();
1102 int ScalarSizeInBits = VectorSizeInBits / Size;
1103 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1104 (void)ScalarSizeInBits;
1105
1106 for (int i = 0; i < Size; ++i) {
1107 int M = Mask[i];
1108 if (M < 0) {
1109 KnownUndef.setBit(i);
1110 continue;
1111 }
1112 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1113 KnownZero.setBit(i);
1114 continue;
1115 }
1116 }
1117 }
1118
1119 /// Test whether a shuffle mask is equivalent within each sub-lane.
1120 ///
1121 /// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1122 /// non-trivial to compute in the face of undef lanes. The representation is
1123 /// suitable for use with existing 128-bit shuffles as entries from the second
1124 /// vector have been remapped to [LaneSize, 2*LaneSize).
isRepeatedShuffleMask(unsigned LaneSizeInBits,MVT VT,ArrayRef<int> Mask,SmallVectorImpl<int> & RepeatedMask)1125 static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1126 ArrayRef<int> Mask,
1127 SmallVectorImpl<int> &RepeatedMask) {
1128 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1129 RepeatedMask.assign(LaneSize, -1);
1130 int Size = Mask.size();
1131 for (int i = 0; i < Size; ++i) {
1132 assert(Mask[i] == -1 || Mask[i] >= 0);
1133 if (Mask[i] < 0)
1134 continue;
1135 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1136 // This entry crosses lanes, so there is no way to model this shuffle.
1137 return false;
1138
1139 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1140 // Adjust second vector indices to start at LaneSize instead of Size.
1141 int LocalM =
1142 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1143 if (RepeatedMask[i % LaneSize] < 0)
1144 // This is the first non-undef entry in this slot of a 128-bit lane.
1145 RepeatedMask[i % LaneSize] = LocalM;
1146 else if (RepeatedMask[i % LaneSize] != LocalM)
1147 // Found a mismatch with the repeated mask.
1148 return false;
1149 }
1150 return true;
1151 }
1152
1153 /// Attempts to match vector shuffle as byte rotation.
matchShuffleAsByteRotate(MVT VT,SDValue & V1,SDValue & V2,ArrayRef<int> Mask)1154 static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
1155 ArrayRef<int> Mask) {
1156
1157 SDValue Lo, Hi;
1158 SmallVector<int, 16> RepeatedMask;
1159
1160 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1161 return -1;
1162
1163 int NumElts = RepeatedMask.size();
1164 int Rotation = 0;
1165 int Scale = 16 / NumElts;
1166
1167 for (int i = 0; i < NumElts; ++i) {
1168 int M = RepeatedMask[i];
1169 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1170 "Unexpected mask index.");
1171 if (M < 0)
1172 continue;
1173
1174 // Determine where a rotated vector would have started.
1175 int StartIdx = i - (M % NumElts);
1176 if (StartIdx == 0)
1177 return -1;
1178
1179 // If we found the tail of a vector the rotation must be the missing
1180 // front. If we found the head of a vector, it must be how much of the
1181 // head.
1182 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1183
1184 if (Rotation == 0)
1185 Rotation = CandidateRotation;
1186 else if (Rotation != CandidateRotation)
1187 return -1;
1188
1189 // Compute which value this mask is pointing at.
1190 SDValue MaskV = M < NumElts ? V1 : V2;
1191
1192 // Compute which of the two target values this index should be assigned
1193 // to. This reflects whether the high elements are remaining or the low
1194 // elements are remaining.
1195 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1196
1197 // Either set up this value if we've not encountered it before, or check
1198 // that it remains consistent.
1199 if (!TargetV)
1200 TargetV = MaskV;
1201 else if (TargetV != MaskV)
1202 return -1;
1203 }
1204
1205 // Check that we successfully analyzed the mask, and normalize the results.
1206 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1207 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1208 if (!Lo)
1209 Lo = Hi;
1210 else if (!Hi)
1211 Hi = Lo;
1212
1213 V1 = Lo;
1214 V2 = Hi;
1215
1216 return Rotation * Scale;
1217 }
1218
1219 /// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1220 ///
1221 /// For example:
1222 /// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1223 /// <2 x i32> <i32 3, i32 0>
1224 /// is lowered to:
1225 /// (VBSRL_V $v1, $v1, 8)
1226 /// (VBSLL_V $v0, $v0, 8)
1227 /// (VOR_V $v0, $V0, $v1)
lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1228 static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL,
1229 ArrayRef<int> Mask, MVT VT,
1230 SDValue V1, SDValue V2,
1231 SelectionDAG &DAG) {
1232
1233 SDValue Lo = V1, Hi = V2;
1234 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1235 if (ByteRotation <= 0)
1236 return SDValue();
1237
1238 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1239 Lo = DAG.getBitcast(ByteVT, Lo);
1240 Hi = DAG.getBitcast(ByteVT, Hi);
1241
1242 int LoByteShift = 16 - ByteRotation;
1243 int HiByteShift = ByteRotation;
1244
1245 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1246 DAG.getConstant(LoByteShift, DL, MVT::i64));
1247 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1248 DAG.getConstant(HiByteShift, DL, MVT::i64));
1249 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1250 }
1251
1252 /// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1253 ///
1254 /// For example:
1255 /// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1256 /// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1257 /// %3 = bitcast <4 x i32> %2 to <2 x i64>
1258 /// is lowered to:
1259 /// (VREPLI $v1, 0)
1260 /// (VILVL $v0, $v1, $v0)
lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG,const APInt & Zeroable)1261 static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
1262 ArrayRef<int> Mask, MVT VT,
1263 SDValue V1, SDValue V2,
1264 SelectionDAG &DAG,
1265 const APInt &Zeroable) {
1266 int Bits = VT.getSizeInBits();
1267 int EltBits = VT.getScalarSizeInBits();
1268 int NumElements = VT.getVectorNumElements();
1269
1270 if (Zeroable.isAllOnes())
1271 return DAG.getConstant(0, DL, VT);
1272
1273 // Define a helper function to check a particular ext-scale and lower to it if
1274 // valid.
1275 auto Lower = [&](int Scale) -> SDValue {
1276 SDValue InputV;
1277 bool AnyExt = true;
1278 int Offset = 0;
1279 for (int i = 0; i < NumElements; i++) {
1280 int M = Mask[i];
1281 if (M < 0)
1282 continue;
1283 if (i % Scale != 0) {
1284 // Each of the extended elements need to be zeroable.
1285 if (!Zeroable[i])
1286 return SDValue();
1287
1288 AnyExt = false;
1289 continue;
1290 }
1291
1292 // Each of the base elements needs to be consecutive indices into the
1293 // same input vector.
1294 SDValue V = M < NumElements ? V1 : V2;
1295 M = M % NumElements;
1296 if (!InputV) {
1297 InputV = V;
1298 Offset = M - (i / Scale);
1299
1300 // These offset can't be handled
1301 if (Offset % (NumElements / Scale))
1302 return SDValue();
1303 } else if (InputV != V)
1304 return SDValue();
1305
1306 if (M != (Offset + (i / Scale)))
1307 return SDValue(); // Non-consecutive strided elements.
1308 }
1309
1310 // If we fail to find an input, we have a zero-shuffle which should always
1311 // have already been handled.
1312 if (!InputV)
1313 return SDValue();
1314
1315 do {
1316 unsigned VilVLoHi = LoongArchISD::VILVL;
1317 if (Offset >= (NumElements / 2)) {
1318 VilVLoHi = LoongArchISD::VILVH;
1319 Offset -= (NumElements / 2);
1320 }
1321
1322 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1323 SDValue Ext =
1324 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1325 InputV = DAG.getBitcast(InputVT, InputV);
1326 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1327 Scale /= 2;
1328 EltBits *= 2;
1329 NumElements /= 2;
1330 } while (Scale > 1);
1331 return DAG.getBitcast(VT, InputV);
1332 };
1333
1334 // Each iteration, try extending the elements half as much, but into twice as
1335 // many elements.
1336 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1337 NumExtElements *= 2) {
1338 if (SDValue V = Lower(NumElements / NumExtElements))
1339 return V;
1340 }
1341 return SDValue();
1342 }
1343
1344 /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1345 ///
1346 /// VREPLVEI performs vector broadcast based on an element specified by an
1347 /// integer immediate, with its mask being similar to:
1348 /// <x, x, x, ...>
1349 /// where x is any valid index.
1350 ///
1351 /// When undef's appear in the mask they are treated as if they were whatever
1352 /// value is necessary in order to fit the above form.
lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1353 static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,
1354 MVT VT, SDValue V1, SDValue V2,
1355 SelectionDAG &DAG) {
1356 int SplatIndex = -1;
1357 for (const auto &M : Mask) {
1358 if (M != -1) {
1359 SplatIndex = M;
1360 break;
1361 }
1362 }
1363
1364 if (SplatIndex == -1)
1365 return DAG.getUNDEF(VT);
1366
1367 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1368 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1369 APInt Imm(64, SplatIndex);
1370 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1371 DAG.getConstant(Imm, DL, MVT::i64));
1372 }
1373
1374 return SDValue();
1375 }
1376
1377 /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1378 ///
1379 /// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1380 /// elements according to a <4 x i2> constant (encoded as an integer immediate).
1381 ///
1382 /// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1383 /// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1384 /// When undef's appear they are treated as if they were whatever value is
1385 /// necessary in order to fit the above forms.
1386 ///
1387 /// For example:
1388 /// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1389 /// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1390 /// i32 7, i32 6, i32 5, i32 4>
1391 /// is lowered to:
1392 /// (VSHUF4I_H $v0, $v1, 27)
1393 /// where the 27 comes from:
1394 /// 3 + (2 << 2) + (1 << 4) + (0 << 6)
lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1395 static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
1396 MVT VT, SDValue V1, SDValue V2,
1397 SelectionDAG &DAG) {
1398
1399 unsigned SubVecSize = 4;
1400 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1401 SubVecSize = 2;
1402
1403 int SubMask[4] = {-1, -1, -1, -1};
1404 for (unsigned i = 0; i < SubVecSize; ++i) {
1405 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1406 int M = Mask[j];
1407
1408 // Convert from vector index to 4-element subvector index
1409 // If an index refers to an element outside of the subvector then give up
1410 if (M != -1) {
1411 M -= 4 * (j / SubVecSize);
1412 if (M < 0 || M >= 4)
1413 return SDValue();
1414 }
1415
1416 // If the mask has an undef, replace it with the current index.
1417 // Note that it might still be undef if the current index is also undef
1418 if (SubMask[i] == -1)
1419 SubMask[i] = M;
1420 // Check that non-undef values are the same as in the mask. If they
1421 // aren't then give up
1422 else if (M != -1 && M != SubMask[i])
1423 return SDValue();
1424 }
1425 }
1426
1427 // Calculate the immediate. Replace any remaining undefs with zero
1428 APInt Imm(64, 0);
1429 for (int i = SubVecSize - 1; i >= 0; --i) {
1430 int M = SubMask[i];
1431
1432 if (M == -1)
1433 M = 0;
1434
1435 Imm <<= 2;
1436 Imm |= M & 0x3;
1437 }
1438
1439 // Return vshuf4i.d
1440 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1441 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1442 DAG.getConstant(Imm, DL, MVT::i64));
1443
1444 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1445 DAG.getConstant(Imm, DL, MVT::i64));
1446 }
1447
1448 /// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1449 ///
1450 /// VPACKEV interleaves the even elements from each vector.
1451 ///
1452 /// It is possible to lower into VPACKEV when the mask consists of two of the
1453 /// following forms interleaved:
1454 /// <0, 2, 4, ...>
1455 /// <n, n+2, n+4, ...>
1456 /// where n is the number of elements in the vector.
1457 /// For example:
1458 /// <0, 0, 2, 2, 4, 4, ...>
1459 /// <0, n, 2, n+2, 4, n+4, ...>
1460 ///
1461 /// When undef's appear in the mask they are treated as if they were whatever
1462 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1463 static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1464 MVT VT, SDValue V1, SDValue V2,
1465 SelectionDAG &DAG) {
1466
1467 const auto &Begin = Mask.begin();
1468 const auto &End = Mask.end();
1469 SDValue OriV1 = V1, OriV2 = V2;
1470
1471 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1472 V1 = OriV1;
1473 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1474 V1 = OriV2;
1475 else
1476 return SDValue();
1477
1478 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1479 V2 = OriV1;
1480 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1481 V2 = OriV2;
1482 else
1483 return SDValue();
1484
1485 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1486 }
1487
1488 /// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1489 ///
1490 /// VPACKOD interleaves the odd elements from each vector.
1491 ///
1492 /// It is possible to lower into VPACKOD when the mask consists of two of the
1493 /// following forms interleaved:
1494 /// <1, 3, 5, ...>
1495 /// <n+1, n+3, n+5, ...>
1496 /// where n is the number of elements in the vector.
1497 /// For example:
1498 /// <1, 1, 3, 3, 5, 5, ...>
1499 /// <1, n+1, 3, n+3, 5, n+5, ...>
1500 ///
1501 /// When undef's appear in the mask they are treated as if they were whatever
1502 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1503 static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1504 MVT VT, SDValue V1, SDValue V2,
1505 SelectionDAG &DAG) {
1506
1507 const auto &Begin = Mask.begin();
1508 const auto &End = Mask.end();
1509 SDValue OriV1 = V1, OriV2 = V2;
1510
1511 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1512 V1 = OriV1;
1513 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1514 V1 = OriV2;
1515 else
1516 return SDValue();
1517
1518 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1519 V2 = OriV1;
1520 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1521 V2 = OriV2;
1522 else
1523 return SDValue();
1524
1525 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1526 }
1527
1528 /// Lower VECTOR_SHUFFLE into VILVH (if possible).
1529 ///
1530 /// VILVH interleaves consecutive elements from the left (highest-indexed) half
1531 /// of each vector.
1532 ///
1533 /// It is possible to lower into VILVH when the mask consists of two of the
1534 /// following forms interleaved:
1535 /// <x, x+1, x+2, ...>
1536 /// <n+x, n+x+1, n+x+2, ...>
1537 /// where n is the number of elements in the vector and x is half n.
1538 /// For example:
1539 /// <x, x, x+1, x+1, x+2, x+2, ...>
1540 /// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1541 ///
1542 /// When undef's appear in the mask they are treated as if they were whatever
1543 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VILVH(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1544 static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
1545 MVT VT, SDValue V1, SDValue V2,
1546 SelectionDAG &DAG) {
1547
1548 const auto &Begin = Mask.begin();
1549 const auto &End = Mask.end();
1550 unsigned HalfSize = Mask.size() / 2;
1551 SDValue OriV1 = V1, OriV2 = V2;
1552
1553 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1554 V1 = OriV1;
1555 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1556 V1 = OriV2;
1557 else
1558 return SDValue();
1559
1560 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1561 V2 = OriV1;
1562 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1563 1))
1564 V2 = OriV2;
1565 else
1566 return SDValue();
1567
1568 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1569 }
1570
1571 /// Lower VECTOR_SHUFFLE into VILVL (if possible).
1572 ///
1573 /// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1574 /// of each vector.
1575 ///
1576 /// It is possible to lower into VILVL when the mask consists of two of the
1577 /// following forms interleaved:
1578 /// <0, 1, 2, ...>
1579 /// <n, n+1, n+2, ...>
1580 /// where n is the number of elements in the vector.
1581 /// For example:
1582 /// <0, 0, 1, 1, 2, 2, ...>
1583 /// <0, n, 1, n+1, 2, n+2, ...>
1584 ///
1585 /// When undef's appear in the mask they are treated as if they were whatever
1586 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VILVL(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1587 static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
1588 MVT VT, SDValue V1, SDValue V2,
1589 SelectionDAG &DAG) {
1590
1591 const auto &Begin = Mask.begin();
1592 const auto &End = Mask.end();
1593 SDValue OriV1 = V1, OriV2 = V2;
1594
1595 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1596 V1 = OriV1;
1597 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1598 V1 = OriV2;
1599 else
1600 return SDValue();
1601
1602 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1603 V2 = OriV1;
1604 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1605 V2 = OriV2;
1606 else
1607 return SDValue();
1608
1609 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1610 }
1611
1612 /// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1613 ///
1614 /// VPICKEV copies the even elements of each vector into the result vector.
1615 ///
1616 /// It is possible to lower into VPICKEV when the mask consists of two of the
1617 /// following forms concatenated:
1618 /// <0, 2, 4, ...>
1619 /// <n, n+2, n+4, ...>
1620 /// where n is the number of elements in the vector.
1621 /// For example:
1622 /// <0, 2, 4, ..., 0, 2, 4, ...>
1623 /// <0, 2, 4, ..., n, n+2, n+4, ...>
1624 ///
1625 /// When undef's appear in the mask they are treated as if they were whatever
1626 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1627 static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1628 MVT VT, SDValue V1, SDValue V2,
1629 SelectionDAG &DAG) {
1630
1631 const auto &Begin = Mask.begin();
1632 const auto &Mid = Mask.begin() + Mask.size() / 2;
1633 const auto &End = Mask.end();
1634 SDValue OriV1 = V1, OriV2 = V2;
1635
1636 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1637 V1 = OriV1;
1638 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1639 V1 = OriV2;
1640 else
1641 return SDValue();
1642
1643 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1644 V2 = OriV1;
1645 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1646 V2 = OriV2;
1647
1648 else
1649 return SDValue();
1650
1651 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1652 }
1653
1654 /// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1655 ///
1656 /// VPICKOD copies the odd elements of each vector into the result vector.
1657 ///
1658 /// It is possible to lower into VPICKOD when the mask consists of two of the
1659 /// following forms concatenated:
1660 /// <1, 3, 5, ...>
1661 /// <n+1, n+3, n+5, ...>
1662 /// where n is the number of elements in the vector.
1663 /// For example:
1664 /// <1, 3, 5, ..., 1, 3, 5, ...>
1665 /// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1666 ///
1667 /// When undef's appear in the mask they are treated as if they were whatever
1668 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1669 static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1670 MVT VT, SDValue V1, SDValue V2,
1671 SelectionDAG &DAG) {
1672
1673 const auto &Begin = Mask.begin();
1674 const auto &Mid = Mask.begin() + Mask.size() / 2;
1675 const auto &End = Mask.end();
1676 SDValue OriV1 = V1, OriV2 = V2;
1677
1678 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1679 V1 = OriV1;
1680 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1681 V1 = OriV2;
1682 else
1683 return SDValue();
1684
1685 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1686 V2 = OriV1;
1687 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1688 V2 = OriV2;
1689 else
1690 return SDValue();
1691
1692 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1693 }
1694
1695 /// Lower VECTOR_SHUFFLE into VSHUF.
1696 ///
1697 /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1698 /// adding it as an operand to the resulting VSHUF.
lowerVECTOR_SHUFFLE_VSHUF(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1699 static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1700 MVT VT, SDValue V1, SDValue V2,
1701 SelectionDAG &DAG) {
1702
1703 SmallVector<SDValue, 16> Ops;
1704 for (auto M : Mask)
1705 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
1706
1707 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1708 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1709
1710 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1711 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1712 // VSHF concatenates the vectors in a bitwise fashion:
1713 // <0b00, 0b01> + <0b10, 0b11> ->
1714 // 0b0100 + 0b1110 -> 0b01001110
1715 // <0b10, 0b11, 0b00, 0b01>
1716 // We must therefore swap the operands to get the correct result.
1717 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1718 }
1719
1720 /// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1721 ///
1722 /// This routine breaks down the specific type of 128-bit shuffle and
1723 /// dispatches to the lowering routines accordingly.
lower128BitShuffle(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1724 static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1725 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1726 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1727 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1728 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1729 "Vector type is unsupported for lsx!");
1730 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1731 "Two operands have different types!");
1732 assert(VT.getVectorNumElements() == Mask.size() &&
1733 "Unexpected mask size for shuffle!");
1734 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1735
1736 APInt KnownUndef, KnownZero;
1737 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1738 APInt Zeroable = KnownUndef | KnownZero;
1739
1740 SDValue Result;
1741 // TODO: Add more comparison patterns.
1742 if (V2.isUndef()) {
1743 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
1744 return Result;
1745 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1746 return Result;
1747
1748 // TODO: This comment may be enabled in the future to better match the
1749 // pattern for instruction selection.
1750 /* V2 = V1; */
1751 }
1752
1753 // It is recommended not to change the pattern comparison order for better
1754 // performance.
1755 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1756 return Result;
1757 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1758 return Result;
1759 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1760 return Result;
1761 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1762 return Result;
1763 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1764 return Result;
1765 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1766 return Result;
1767 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1768 (Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1769 return Result;
1770 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1771 Zeroable)))
1772 return Result;
1773 if ((Result =
1774 lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Zeroable)))
1775 return Result;
1776 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG)))
1777 return Result;
1778 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1779 return NewShuffle;
1780 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1781 return Result;
1782 return SDValue();
1783 }
1784
1785 /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1786 ///
1787 /// It is a XVREPLVEI when the mask is:
1788 /// <x, x, x, ..., x+n, x+n, x+n, ...>
1789 /// where the number of x is equal to n and n is half the length of vector.
1790 ///
1791 /// When undef's appear in the mask they are treated as if they were whatever
1792 /// value is necessary in order to fit the above form.
lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1793 static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
1794 ArrayRef<int> Mask, MVT VT,
1795 SDValue V1, SDValue V2,
1796 SelectionDAG &DAG) {
1797 int SplatIndex = -1;
1798 for (const auto &M : Mask) {
1799 if (M != -1) {
1800 SplatIndex = M;
1801 break;
1802 }
1803 }
1804
1805 if (SplatIndex == -1)
1806 return DAG.getUNDEF(VT);
1807
1808 const auto &Begin = Mask.begin();
1809 const auto &End = Mask.end();
1810 unsigned HalfSize = Mask.size() / 2;
1811
1812 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1813 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
1814 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
1815 0)) {
1816 APInt Imm(64, SplatIndex);
1817 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1818 DAG.getConstant(Imm, DL, MVT::i64));
1819 }
1820
1821 return SDValue();
1822 }
1823
1824 /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1825 static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
1826 MVT VT, SDValue V1, SDValue V2,
1827 SelectionDAG &DAG) {
1828 // When the size is less than or equal to 4, lower cost instructions may be
1829 // used.
1830 if (Mask.size() <= 4)
1831 return SDValue();
1832 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
1833 }
1834
1835 /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1836 static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1837 MVT VT, SDValue V1, SDValue V2,
1838 SelectionDAG &DAG) {
1839 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
1840 }
1841
1842 /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1843 static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1844 MVT VT, SDValue V1, SDValue V2,
1845 SelectionDAG &DAG) {
1846 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1847 }
1848
1849 /// Lower VECTOR_SHUFFLE into XVILVH (if possible).
lowerVECTOR_SHUFFLE_XVILVH(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1850 static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
1851 MVT VT, SDValue V1, SDValue V2,
1852 SelectionDAG &DAG) {
1853
1854 const auto &Begin = Mask.begin();
1855 const auto &End = Mask.end();
1856 unsigned HalfSize = Mask.size() / 2;
1857 unsigned LeftSize = HalfSize / 2;
1858 SDValue OriV1 = V1, OriV2 = V2;
1859
1860 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
1861 1) &&
1862 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
1863 V1 = OriV1;
1864 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
1865 Mask.size() + HalfSize - LeftSize, 1) &&
1866 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1867 Mask.size() + HalfSize + LeftSize, 1))
1868 V1 = OriV2;
1869 else
1870 return SDValue();
1871
1872 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
1873 1) &&
1874 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
1875 1))
1876 V2 = OriV1;
1877 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
1878 Mask.size() + HalfSize - LeftSize, 1) &&
1879 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1880 Mask.size() + HalfSize + LeftSize, 1))
1881 V2 = OriV2;
1882 else
1883 return SDValue();
1884
1885 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1886 }
1887
1888 /// Lower VECTOR_SHUFFLE into XVILVL (if possible).
lowerVECTOR_SHUFFLE_XVILVL(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1889 static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
1890 MVT VT, SDValue V1, SDValue V2,
1891 SelectionDAG &DAG) {
1892
1893 const auto &Begin = Mask.begin();
1894 const auto &End = Mask.end();
1895 unsigned HalfSize = Mask.size() / 2;
1896 SDValue OriV1 = V1, OriV2 = V2;
1897
1898 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1899 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1900 V1 = OriV1;
1901 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1902 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1903 Mask.size() + HalfSize, 1))
1904 V1 = OriV2;
1905 else
1906 return SDValue();
1907
1908 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1909 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1910 V2 = OriV1;
1911 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1912 1) &&
1913 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1914 Mask.size() + HalfSize, 1))
1915 V2 = OriV2;
1916 else
1917 return SDValue();
1918
1919 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1920 }
1921
1922 /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1923 static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1924 MVT VT, SDValue V1, SDValue V2,
1925 SelectionDAG &DAG) {
1926
1927 const auto &Begin = Mask.begin();
1928 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1929 const auto &Mid = Mask.begin() + Mask.size() / 2;
1930 const auto &RightMid = Mask.end() - Mask.size() / 4;
1931 const auto &End = Mask.end();
1932 unsigned HalfSize = Mask.size() / 2;
1933 SDValue OriV1 = V1, OriV2 = V2;
1934
1935 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1936 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1937 V1 = OriV1;
1938 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1939 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1940 V1 = OriV2;
1941 else
1942 return SDValue();
1943
1944 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1945 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1946 V2 = OriV1;
1947 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1948 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1949 V2 = OriV2;
1950
1951 else
1952 return SDValue();
1953
1954 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1955 }
1956
1957 /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1958 static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1959 MVT VT, SDValue V1, SDValue V2,
1960 SelectionDAG &DAG) {
1961
1962 const auto &Begin = Mask.begin();
1963 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1964 const auto &Mid = Mask.begin() + Mask.size() / 2;
1965 const auto &RightMid = Mask.end() - Mask.size() / 4;
1966 const auto &End = Mask.end();
1967 unsigned HalfSize = Mask.size() / 2;
1968 SDValue OriV1 = V1, OriV2 = V2;
1969
1970 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1971 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1972 V1 = OriV1;
1973 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1974 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1975 2))
1976 V1 = OriV2;
1977 else
1978 return SDValue();
1979
1980 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1981 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1982 V2 = OriV1;
1983 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1984 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1985 2))
1986 V2 = OriV2;
1987 else
1988 return SDValue();
1989
1990 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1991 }
1992
1993 /// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1994 static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1995 MVT VT, SDValue V1, SDValue V2,
1996 SelectionDAG &DAG) {
1997
1998 int MaskSize = Mask.size();
1999 int HalfSize = Mask.size() / 2;
2000 const auto &Begin = Mask.begin();
2001 const auto &Mid = Mask.begin() + HalfSize;
2002 const auto &End = Mask.end();
2003
2004 // VECTOR_SHUFFLE concatenates the vectors:
2005 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2006 // shuffling ->
2007 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2008 //
2009 // XVSHUF concatenates the vectors:
2010 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2011 // shuffling ->
2012 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2013 SmallVector<SDValue, 8> MaskAlloc;
2014 for (auto it = Begin; it < Mid; it++) {
2015 if (*it < 0) // UNDEF
2016 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2017 else if ((*it >= 0 && *it < HalfSize) ||
2018 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2019 int M = *it < HalfSize ? *it : *it - HalfSize;
2020 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2021 } else
2022 return SDValue();
2023 }
2024 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2025
2026 for (auto it = Mid; it < End; it++) {
2027 if (*it < 0) // UNDEF
2028 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2029 else if ((*it >= HalfSize && *it < MaskSize) ||
2030 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2031 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2032 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2033 } else
2034 return SDValue();
2035 }
2036 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2037
2038 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2039 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2040 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2041 }
2042
2043 /// Shuffle vectors by lane to generate more optimized instructions.
2044 /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2045 ///
2046 /// Therefore, except for the following four cases, other cases are regarded
2047 /// as cross-lane shuffles, where optimization is relatively limited.
2048 ///
2049 /// - Shuffle high, low lanes of two inputs vector
2050 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2051 /// - Shuffle low, high lanes of two inputs vector
2052 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2053 /// - Shuffle low, low lanes of two inputs vector
2054 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2055 /// - Shuffle high, high lanes of two inputs vector
2056 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2057 ///
2058 /// The first case is the closest to LoongArch instructions and the other
2059 /// cases need to be converted to it for processing.
2060 ///
2061 /// This function may modify V1, V2 and Mask
canonicalizeShuffleVectorByLane(const SDLoc & DL,MutableArrayRef<int> Mask,MVT VT,SDValue & V1,SDValue & V2,SelectionDAG & DAG)2062 static void canonicalizeShuffleVectorByLane(const SDLoc &DL,
2063 MutableArrayRef<int> Mask, MVT VT,
2064 SDValue &V1, SDValue &V2,
2065 SelectionDAG &DAG) {
2066
2067 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2068
2069 int MaskSize = Mask.size();
2070 int HalfSize = Mask.size() / 2;
2071
2072 HalfMaskType preMask = None, postMask = None;
2073
2074 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2075 return M < 0 || (M >= 0 && M < HalfSize) ||
2076 (M >= MaskSize && M < MaskSize + HalfSize);
2077 }))
2078 preMask = HighLaneTy;
2079 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2080 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2081 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2082 }))
2083 preMask = LowLaneTy;
2084
2085 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2086 return M < 0 || (M >= 0 && M < HalfSize) ||
2087 (M >= MaskSize && M < MaskSize + HalfSize);
2088 }))
2089 postMask = HighLaneTy;
2090 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2091 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2092 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2093 }))
2094 postMask = LowLaneTy;
2095
2096 // The pre-half of mask is high lane type, and the post-half of mask
2097 // is low lane type, which is closest to the LoongArch instructions.
2098 //
2099 // Note: In the LoongArch architecture, the high lane of mask corresponds
2100 // to the lower 128-bit of vector register, and the low lane of mask
2101 // corresponds the higher 128-bit of vector register.
2102 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2103 return;
2104 }
2105 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2106 V1 = DAG.getBitcast(MVT::v4i64, V1);
2107 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2108 DAG.getConstant(0b01001110, DL, MVT::i64));
2109 V1 = DAG.getBitcast(VT, V1);
2110
2111 if (!V2.isUndef()) {
2112 V2 = DAG.getBitcast(MVT::v4i64, V2);
2113 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2114 DAG.getConstant(0b01001110, DL, MVT::i64));
2115 V2 = DAG.getBitcast(VT, V2);
2116 }
2117
2118 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2119 *it = *it < 0 ? *it : *it - HalfSize;
2120 }
2121 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2122 *it = *it < 0 ? *it : *it + HalfSize;
2123 }
2124 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2125 V1 = DAG.getBitcast(MVT::v4i64, V1);
2126 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2127 DAG.getConstant(0b11101110, DL, MVT::i64));
2128 V1 = DAG.getBitcast(VT, V1);
2129
2130 if (!V2.isUndef()) {
2131 V2 = DAG.getBitcast(MVT::v4i64, V2);
2132 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2133 DAG.getConstant(0b11101110, DL, MVT::i64));
2134 V2 = DAG.getBitcast(VT, V2);
2135 }
2136
2137 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2138 *it = *it < 0 ? *it : *it - HalfSize;
2139 }
2140 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2141 V1 = DAG.getBitcast(MVT::v4i64, V1);
2142 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2143 DAG.getConstant(0b01000100, DL, MVT::i64));
2144 V1 = DAG.getBitcast(VT, V1);
2145
2146 if (!V2.isUndef()) {
2147 V2 = DAG.getBitcast(MVT::v4i64, V2);
2148 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2149 DAG.getConstant(0b01000100, DL, MVT::i64));
2150 V2 = DAG.getBitcast(VT, V2);
2151 }
2152
2153 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2154 *it = *it < 0 ? *it : *it + HalfSize;
2155 }
2156 } else { // cross-lane
2157 return;
2158 }
2159 }
2160
2161 /// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2162 /// Only for 256-bit vector.
2163 ///
2164 /// For example:
2165 /// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2166 /// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2167 /// is lowerded to:
2168 /// (XVPERMI $xr2, $xr0, 78)
2169 /// (XVSHUF $xr1, $xr2, $xr0)
2170 /// (XVORI $xr0, $xr1, 0)
lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)2171 static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL,
2172 ArrayRef<int> Mask,
2173 MVT VT, SDValue V1,
2174 SDValue V2,
2175 SelectionDAG &DAG) {
2176 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2177 int Size = Mask.size();
2178 int LaneSize = Size / 2;
2179
2180 bool LaneCrossing[2] = {false, false};
2181 for (int i = 0; i < Size; ++i)
2182 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2183 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2184
2185 // Ensure that all lanes ared involved.
2186 if (!LaneCrossing[0] && !LaneCrossing[1])
2187 return SDValue();
2188
2189 SmallVector<int> InLaneMask;
2190 InLaneMask.assign(Mask.begin(), Mask.end());
2191 for (int i = 0; i < Size; ++i) {
2192 int &M = InLaneMask[i];
2193 if (M < 0)
2194 continue;
2195 if (((M % Size) / LaneSize) != (i / LaneSize))
2196 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2197 }
2198
2199 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2200 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2201 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2202 Flipped = DAG.getBitcast(VT, Flipped);
2203 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2204 }
2205
2206 /// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2207 ///
2208 /// This routine breaks down the specific type of 256-bit shuffle and
2209 /// dispatches to the lowering routines accordingly.
lower256BitShuffle(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)2210 static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2211 SDValue V1, SDValue V2, SelectionDAG &DAG) {
2212 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2213 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2214 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2215 "Vector type is unsupported for lasx!");
2216 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2217 "Two operands have different types!");
2218 assert(VT.getVectorNumElements() == Mask.size() &&
2219 "Unexpected mask size for shuffle!");
2220 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2221 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2222
2223 // canonicalize non cross-lane shuffle vector
2224 SmallVector<int> NewMask(Mask);
2225 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
2226
2227 APInt KnownUndef, KnownZero;
2228 computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2229 APInt Zeroable = KnownUndef | KnownZero;
2230
2231 SDValue Result;
2232 // TODO: Add more comparison patterns.
2233 if (V2.isUndef()) {
2234 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
2235 return Result;
2236 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
2237 return Result;
2238 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2239 V1, V2, DAG)))
2240 return Result;
2241
2242 // TODO: This comment may be enabled in the future to better match the
2243 // pattern for instruction selection.
2244 /* V2 = V1; */
2245 }
2246
2247 // It is recommended not to change the pattern comparison order for better
2248 // performance.
2249 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2250 return Result;
2251 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2252 return Result;
2253 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2254 return Result;
2255 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2256 return Result;
2257 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2258 return Result;
2259 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2260 return Result;
2261 if ((Result =
2262 lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, Zeroable)))
2263 return Result;
2264 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG)))
2265 return Result;
2266 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2267 return NewShuffle;
2268 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2269 return Result;
2270
2271 return SDValue();
2272 }
2273
lowerVECTOR_SHUFFLE(SDValue Op,SelectionDAG & DAG) const2274 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2275 SelectionDAG &DAG) const {
2276 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2277 ArrayRef<int> OrigMask = SVOp->getMask();
2278 SDValue V1 = Op.getOperand(0);
2279 SDValue V2 = Op.getOperand(1);
2280 MVT VT = Op.getSimpleValueType();
2281 int NumElements = VT.getVectorNumElements();
2282 SDLoc DL(Op);
2283
2284 bool V1IsUndef = V1.isUndef();
2285 bool V2IsUndef = V2.isUndef();
2286 if (V1IsUndef && V2IsUndef)
2287 return DAG.getUNDEF(VT);
2288
2289 // When we create a shuffle node we put the UNDEF node to second operand,
2290 // but in some cases the first operand may be transformed to UNDEF.
2291 // In this case we should just commute the node.
2292 if (V1IsUndef)
2293 return DAG.getCommutedVectorShuffle(*SVOp);
2294
2295 // Check for non-undef masks pointing at an undef vector and make the masks
2296 // undef as well. This makes it easier to match the shuffle based solely on
2297 // the mask.
2298 if (V2IsUndef &&
2299 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2300 SmallVector<int, 8> NewMask(OrigMask);
2301 for (int &M : NewMask)
2302 if (M >= NumElements)
2303 M = -1;
2304 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2305 }
2306
2307 // Check for illegal shuffle mask element index values.
2308 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2309 (void)MaskUpperLimit;
2310 assert(llvm::all_of(OrigMask,
2311 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2312 "Out of bounds shuffle index");
2313
2314 // For each vector width, delegate to a specialized lowering routine.
2315 if (VT.is128BitVector())
2316 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
2317
2318 if (VT.is256BitVector())
2319 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
2320
2321 return SDValue();
2322 }
2323
lowerFP_TO_FP16(SDValue Op,SelectionDAG & DAG) const2324 SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2325 SelectionDAG &DAG) const {
2326 // Custom lower to ensure the libcall return is passed in an FPR on hard
2327 // float ABIs.
2328 SDLoc DL(Op);
2329 MakeLibCallOptions CallOptions;
2330 SDValue Op0 = Op.getOperand(0);
2331 SDValue Chain = SDValue();
2332 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2333 SDValue Res;
2334 std::tie(Res, Chain) =
2335 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2336 if (Subtarget.is64Bit())
2337 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2338 return DAG.getBitcast(MVT::i32, Res);
2339 }
2340
lowerFP16_TO_FP(SDValue Op,SelectionDAG & DAG) const2341 SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2342 SelectionDAG &DAG) const {
2343 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2344 // float ABIs.
2345 SDLoc DL(Op);
2346 MakeLibCallOptions CallOptions;
2347 SDValue Op0 = Op.getOperand(0);
2348 SDValue Chain = SDValue();
2349 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2350 DL, MVT::f32, Op0)
2351 : DAG.getBitcast(MVT::f32, Op0);
2352 SDValue Res;
2353 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2354 CallOptions, DL, Chain);
2355 return Res;
2356 }
2357
lowerFP_TO_BF16(SDValue Op,SelectionDAG & DAG) const2358 SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2359 SelectionDAG &DAG) const {
2360 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2361 SDLoc DL(Op);
2362 MakeLibCallOptions CallOptions;
2363 RTLIB::Libcall LC =
2364 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2365 SDValue Res =
2366 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2367 if (Subtarget.is64Bit())
2368 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2369 return DAG.getBitcast(MVT::i32, Res);
2370 }
2371
lowerBF16_TO_FP(SDValue Op,SelectionDAG & DAG) const2372 SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2373 SelectionDAG &DAG) const {
2374 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2375 MVT VT = Op.getSimpleValueType();
2376 SDLoc DL(Op);
2377 Op = DAG.getNode(
2378 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2379 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2380 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2381 DL, MVT::f32, Op)
2382 : DAG.getBitcast(MVT::f32, Op);
2383 if (VT != MVT::f32)
2384 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2385 return Res;
2386 }
2387
isConstantOrUndef(const SDValue Op)2388 static bool isConstantOrUndef(const SDValue Op) {
2389 if (Op->isUndef())
2390 return true;
2391 if (isa<ConstantSDNode>(Op))
2392 return true;
2393 if (isa<ConstantFPSDNode>(Op))
2394 return true;
2395 return false;
2396 }
2397
isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode * Op)2398 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
2399 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2400 if (isConstantOrUndef(Op->getOperand(i)))
2401 return true;
2402 return false;
2403 }
2404
2405 // Lower BUILD_VECTOR as broadcast load (if possible).
2406 // For example:
2407 // %a = load i8, ptr %ptr
2408 // %b = build_vector %a, %a, %a, %a
2409 // is lowered to :
2410 // (VLDREPL_B $a0, 0)
lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode * BVOp,const SDLoc & DL,SelectionDAG & DAG)2411 static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
2412 const SDLoc &DL,
2413 SelectionDAG &DAG) {
2414 MVT VT = BVOp->getSimpleValueType(0);
2415 int NumOps = BVOp->getNumOperands();
2416
2417 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2418 "Unsupported vector type for broadcast.");
2419
2420 SDValue IdentitySrc;
2421 bool IsIdeneity = true;
2422
2423 for (int i = 0; i != NumOps; i++) {
2424 SDValue Op = BVOp->getOperand(i);
2425 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2426 IsIdeneity = false;
2427 break;
2428 }
2429 IdentitySrc = BVOp->getOperand(0);
2430 }
2431
2432 // make sure that this load is valid and only has one user.
2433 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2434 return SDValue();
2435
2436 auto *LN = cast<LoadSDNode>(IdentitySrc);
2437 auto ExtType = LN->getExtensionType();
2438
2439 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2440 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2441 SDVTList Tys =
2442 LN->isIndexed()
2443 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2444 : DAG.getVTList(VT, MVT::Other);
2445 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2446 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2447 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2448 return BCast;
2449 }
2450 return SDValue();
2451 }
2452
lowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const2453 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2454 SelectionDAG &DAG) const {
2455 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2456 EVT ResTy = Op->getValueType(0);
2457 SDLoc DL(Op);
2458 APInt SplatValue, SplatUndef;
2459 unsigned SplatBitSize;
2460 bool HasAnyUndefs;
2461 bool Is128Vec = ResTy.is128BitVector();
2462 bool Is256Vec = ResTy.is256BitVector();
2463
2464 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2465 (!Subtarget.hasExtLASX() || !Is256Vec))
2466 return SDValue();
2467
2468 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2469 return Result;
2470
2471 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2472 /*MinSplatBits=*/8) &&
2473 SplatBitSize <= 64) {
2474 // We can only cope with 8, 16, 32, or 64-bit elements.
2475 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2476 SplatBitSize != 64)
2477 return SDValue();
2478
2479 EVT ViaVecTy;
2480
2481 switch (SplatBitSize) {
2482 default:
2483 return SDValue();
2484 case 8:
2485 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2486 break;
2487 case 16:
2488 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2489 break;
2490 case 32:
2491 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2492 break;
2493 case 64:
2494 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2495 break;
2496 }
2497
2498 // SelectionDAG::getConstant will promote SplatValue appropriately.
2499 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2500
2501 // Bitcast to the type we originally wanted.
2502 if (ViaVecTy != ResTy)
2503 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2504
2505 return Result;
2506 }
2507
2508 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2509 return Op;
2510
2511 if (!isConstantOrUndefBUILD_VECTOR(Node)) {
2512 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2513 // The resulting code is the same length as the expansion, but it doesn't
2514 // use memory operations.
2515 EVT ResTy = Node->getValueType(0);
2516
2517 assert(ResTy.isVector());
2518
2519 unsigned NumElts = ResTy.getVectorNumElements();
2520 SDValue Vector = DAG.getUNDEF(ResTy);
2521 for (unsigned i = 0; i < NumElts; ++i) {
2522 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
2523 Node->getOperand(i),
2524 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2525 }
2526 return Vector;
2527 }
2528
2529 return SDValue();
2530 }
2531
lowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const2532 SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
2533 SelectionDAG &DAG) const {
2534 SDLoc DL(Op);
2535 MVT ResVT = Op.getSimpleValueType();
2536 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
2537
2538 unsigned NumOperands = Op.getNumOperands();
2539 unsigned NumFreezeUndef = 0;
2540 unsigned NumZero = 0;
2541 unsigned NumNonZero = 0;
2542 unsigned NonZeros = 0;
2543 SmallSet<SDValue, 4> Undefs;
2544 for (unsigned i = 0; i != NumOperands; ++i) {
2545 SDValue SubVec = Op.getOperand(i);
2546 if (SubVec.isUndef())
2547 continue;
2548 if (ISD::isFreezeUndef(SubVec.getNode())) {
2549 // If the freeze(undef) has multiple uses then we must fold to zero.
2550 if (SubVec.hasOneUse()) {
2551 ++NumFreezeUndef;
2552 } else {
2553 ++NumZero;
2554 Undefs.insert(SubVec);
2555 }
2556 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
2557 ++NumZero;
2558 else {
2559 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
2560 NonZeros |= 1 << i;
2561 ++NumNonZero;
2562 }
2563 }
2564
2565 // If we have more than 2 non-zeros, build each half separately.
2566 if (NumNonZero > 2) {
2567 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
2568 ArrayRef<SDUse> Ops = Op->ops();
2569 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2570 Ops.slice(0, NumOperands / 2));
2571 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2572 Ops.slice(NumOperands / 2));
2573 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
2574 }
2575
2576 // Otherwise, build it up through insert_subvectors.
2577 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
2578 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
2579 : DAG.getUNDEF(ResVT));
2580
2581 // Replace Undef operands with ZeroVector.
2582 for (SDValue U : Undefs)
2583 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
2584
2585 MVT SubVT = Op.getOperand(0).getSimpleValueType();
2586 unsigned NumSubElems = SubVT.getVectorNumElements();
2587 for (unsigned i = 0; i != NumOperands; ++i) {
2588 if ((NonZeros & (1 << i)) == 0)
2589 continue;
2590
2591 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
2592 DAG.getVectorIdxConstant(i * NumSubElems, DL));
2593 }
2594
2595 return Vec;
2596 }
2597
2598 SDValue
lowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const2599 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2600 SelectionDAG &DAG) const {
2601 EVT VecTy = Op->getOperand(0)->getValueType(0);
2602 SDValue Idx = Op->getOperand(1);
2603 EVT EltTy = VecTy.getVectorElementType();
2604 unsigned NumElts = VecTy.getVectorNumElements();
2605
2606 if (isa<ConstantSDNode>(Idx) &&
2607 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
2608 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
2609 return Op;
2610
2611 return SDValue();
2612 }
2613
2614 SDValue
lowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const2615 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2616 SelectionDAG &DAG) const {
2617 if (isa<ConstantSDNode>(Op->getOperand(2)))
2618 return Op;
2619 return SDValue();
2620 }
2621
lowerATOMIC_FENCE(SDValue Op,SelectionDAG & DAG) const2622 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
2623 SelectionDAG &DAG) const {
2624 SDLoc DL(Op);
2625 SyncScope::ID FenceSSID =
2626 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
2627
2628 // singlethread fences only synchronize with signal handlers on the same
2629 // thread and thus only need to preserve instruction order, not actually
2630 // enforce memory ordering.
2631 if (FenceSSID == SyncScope::SingleThread)
2632 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
2633 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
2634
2635 return Op;
2636 }
2637
lowerWRITE_REGISTER(SDValue Op,SelectionDAG & DAG) const2638 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
2639 SelectionDAG &DAG) const {
2640
2641 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
2642 DAG.getContext()->emitError(
2643 "On LA64, only 64-bit registers can be written.");
2644 return Op.getOperand(0);
2645 }
2646
2647 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
2648 DAG.getContext()->emitError(
2649 "On LA32, only 32-bit registers can be written.");
2650 return Op.getOperand(0);
2651 }
2652
2653 return Op;
2654 }
2655
lowerFRAMEADDR(SDValue Op,SelectionDAG & DAG) const2656 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
2657 SelectionDAG &DAG) const {
2658 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
2659 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
2660 "be a constant integer");
2661 return SDValue();
2662 }
2663
2664 MachineFunction &MF = DAG.getMachineFunction();
2665 MF.getFrameInfo().setFrameAddressIsTaken(true);
2666 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
2667 EVT VT = Op.getValueType();
2668 SDLoc DL(Op);
2669 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2670 unsigned Depth = Op.getConstantOperandVal(0);
2671 int GRLenInBytes = Subtarget.getGRLen() / 8;
2672
2673 while (Depth--) {
2674 int Offset = -(GRLenInBytes * 2);
2675 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2676 DAG.getSignedConstant(Offset, DL, VT));
2677 FrameAddr =
2678 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2679 }
2680 return FrameAddr;
2681 }
2682
lowerRETURNADDR(SDValue Op,SelectionDAG & DAG) const2683 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
2684 SelectionDAG &DAG) const {
2685 // Currently only support lowering return address for current frame.
2686 if (Op.getConstantOperandVal(0) != 0) {
2687 DAG.getContext()->emitError(
2688 "return address can only be determined for the current frame");
2689 return SDValue();
2690 }
2691
2692 MachineFunction &MF = DAG.getMachineFunction();
2693 MF.getFrameInfo().setReturnAddressIsTaken(true);
2694 MVT GRLenVT = Subtarget.getGRLenVT();
2695
2696 // Return the value of the return address register, marking it an implicit
2697 // live-in.
2698 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
2699 getRegClassFor(GRLenVT));
2700 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
2701 }
2702
lowerEH_DWARF_CFA(SDValue Op,SelectionDAG & DAG) const2703 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
2704 SelectionDAG &DAG) const {
2705 MachineFunction &MF = DAG.getMachineFunction();
2706 auto Size = Subtarget.getGRLen() / 8;
2707 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
2708 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2709 }
2710
lowerVASTART(SDValue Op,SelectionDAG & DAG) const2711 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
2712 SelectionDAG &DAG) const {
2713 MachineFunction &MF = DAG.getMachineFunction();
2714 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
2715
2716 SDLoc DL(Op);
2717 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2718 getPointerTy(MF.getDataLayout()));
2719
2720 // vastart just stores the address of the VarArgsFrameIndex slot into the
2721 // memory location argument.
2722 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2723 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
2724 MachinePointerInfo(SV));
2725 }
2726
lowerUINT_TO_FP(SDValue Op,SelectionDAG & DAG) const2727 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
2728 SelectionDAG &DAG) const {
2729 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2730 !Subtarget.hasBasicD() && "unexpected target features");
2731
2732 SDLoc DL(Op);
2733 SDValue Op0 = Op.getOperand(0);
2734 if (Op0->getOpcode() == ISD::AND) {
2735 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
2736 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
2737 return Op;
2738 }
2739
2740 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
2741 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
2742 Op0.getConstantOperandVal(2) == UINT64_C(0))
2743 return Op;
2744
2745 if (Op0.getOpcode() == ISD::AssertZext &&
2746 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
2747 return Op;
2748
2749 EVT OpVT = Op0.getValueType();
2750 EVT RetVT = Op.getValueType();
2751 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
2752 MakeLibCallOptions CallOptions;
2753 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
2754 SDValue Chain = SDValue();
2755 SDValue Result;
2756 std::tie(Result, Chain) =
2757 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
2758 return Result;
2759 }
2760
lowerSINT_TO_FP(SDValue Op,SelectionDAG & DAG) const2761 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
2762 SelectionDAG &DAG) const {
2763 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2764 !Subtarget.hasBasicD() && "unexpected target features");
2765
2766 SDLoc DL(Op);
2767 SDValue Op0 = Op.getOperand(0);
2768
2769 if ((Op0.getOpcode() == ISD::AssertSext ||
2770 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
2771 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
2772 return Op;
2773
2774 EVT OpVT = Op0.getValueType();
2775 EVT RetVT = Op.getValueType();
2776 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
2777 MakeLibCallOptions CallOptions;
2778 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
2779 SDValue Chain = SDValue();
2780 SDValue Result;
2781 std::tie(Result, Chain) =
2782 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
2783 return Result;
2784 }
2785
lowerBITCAST(SDValue Op,SelectionDAG & DAG) const2786 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
2787 SelectionDAG &DAG) const {
2788
2789 SDLoc DL(Op);
2790 EVT VT = Op.getValueType();
2791 SDValue Op0 = Op.getOperand(0);
2792 EVT Op0VT = Op0.getValueType();
2793
2794 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
2795 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
2796 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
2797 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
2798 }
2799 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
2800 SDValue Lo, Hi;
2801 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
2802 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
2803 }
2804 return Op;
2805 }
2806
lowerFP_TO_SINT(SDValue Op,SelectionDAG & DAG) const2807 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
2808 SelectionDAG &DAG) const {
2809
2810 SDLoc DL(Op);
2811 SDValue Op0 = Op.getOperand(0);
2812
2813 if (Op0.getValueType() == MVT::f16)
2814 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
2815
2816 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
2817 !Subtarget.hasBasicD()) {
2818 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
2819 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
2820 }
2821
2822 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
2823 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
2824 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
2825 }
2826
getTargetNode(GlobalAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)2827 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
2828 SelectionDAG &DAG, unsigned Flags) {
2829 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
2830 }
2831
getTargetNode(BlockAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)2832 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
2833 SelectionDAG &DAG, unsigned Flags) {
2834 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
2835 Flags);
2836 }
2837
getTargetNode(ConstantPoolSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)2838 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
2839 SelectionDAG &DAG, unsigned Flags) {
2840 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
2841 N->getOffset(), Flags);
2842 }
2843
getTargetNode(JumpTableSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)2844 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
2845 SelectionDAG &DAG, unsigned Flags) {
2846 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
2847 }
2848
2849 template <class NodeTy>
getAddr(NodeTy * N,SelectionDAG & DAG,CodeModel::Model M,bool IsLocal) const2850 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2851 CodeModel::Model M,
2852 bool IsLocal) const {
2853 SDLoc DL(N);
2854 EVT Ty = getPointerTy(DAG.getDataLayout());
2855 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2856 SDValue Load;
2857
2858 switch (M) {
2859 default:
2860 report_fatal_error("Unsupported code model");
2861
2862 case CodeModel::Large: {
2863 assert(Subtarget.is64Bit() && "Large code model requires LA64");
2864
2865 // This is not actually used, but is necessary for successfully matching
2866 // the PseudoLA_*_LARGE nodes.
2867 SDValue Tmp = DAG.getConstant(0, DL, Ty);
2868 if (IsLocal) {
2869 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
2870 // eventually becomes the desired 5-insn code sequence.
2871 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
2872 Tmp, Addr),
2873 0);
2874 } else {
2875 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
2876 // eventually becomes the desired 5-insn code sequence.
2877 Load = SDValue(
2878 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
2879 0);
2880 }
2881 break;
2882 }
2883
2884 case CodeModel::Small:
2885 case CodeModel::Medium:
2886 if (IsLocal) {
2887 // This generates the pattern (PseudoLA_PCREL sym), which expands to
2888 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
2889 Load = SDValue(
2890 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
2891 } else {
2892 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
2893 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
2894 Load =
2895 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
2896 }
2897 }
2898
2899 if (!IsLocal) {
2900 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
2901 MachineFunction &MF = DAG.getMachineFunction();
2902 MachineMemOperand *MemOp = MF.getMachineMemOperand(
2903 MachinePointerInfo::getGOT(MF),
2904 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
2905 MachineMemOperand::MOInvariant,
2906 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
2907 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
2908 }
2909
2910 return Load;
2911 }
2912
lowerBlockAddress(SDValue Op,SelectionDAG & DAG) const2913 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
2914 SelectionDAG &DAG) const {
2915 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
2916 DAG.getTarget().getCodeModel());
2917 }
2918
lowerJumpTable(SDValue Op,SelectionDAG & DAG) const2919 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
2920 SelectionDAG &DAG) const {
2921 return getAddr(cast<JumpTableSDNode>(Op), DAG,
2922 DAG.getTarget().getCodeModel());
2923 }
2924
lowerConstantPool(SDValue Op,SelectionDAG & DAG) const2925 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
2926 SelectionDAG &DAG) const {
2927 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
2928 DAG.getTarget().getCodeModel());
2929 }
2930
lowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const2931 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
2932 SelectionDAG &DAG) const {
2933 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2934 assert(N->getOffset() == 0 && "unexpected offset in global node");
2935 auto CM = DAG.getTarget().getCodeModel();
2936 const GlobalValue *GV = N->getGlobal();
2937
2938 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
2939 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
2940 CM = *GCM;
2941 }
2942
2943 return getAddr(N, DAG, CM, GV->isDSOLocal());
2944 }
2945
getStaticTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc,bool UseGOT,bool Large) const2946 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
2947 SelectionDAG &DAG,
2948 unsigned Opc, bool UseGOT,
2949 bool Large) const {
2950 SDLoc DL(N);
2951 EVT Ty = getPointerTy(DAG.getDataLayout());
2952 MVT GRLenVT = Subtarget.getGRLenVT();
2953
2954 // This is not actually used, but is necessary for successfully matching the
2955 // PseudoLA_*_LARGE nodes.
2956 SDValue Tmp = DAG.getConstant(0, DL, Ty);
2957 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
2958
2959 // Only IE needs an extra argument for large code model.
2960 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
2961 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
2962 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
2963
2964 // If it is LE for normal/medium code model, the add tp operation will occur
2965 // during the pseudo-instruction expansion.
2966 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
2967 return Offset;
2968
2969 if (UseGOT) {
2970 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
2971 MachineFunction &MF = DAG.getMachineFunction();
2972 MachineMemOperand *MemOp = MF.getMachineMemOperand(
2973 MachinePointerInfo::getGOT(MF),
2974 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
2975 MachineMemOperand::MOInvariant,
2976 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
2977 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
2978 }
2979
2980 // Add the thread pointer.
2981 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
2982 DAG.getRegister(LoongArch::R2, GRLenVT));
2983 }
2984
getDynamicTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc,bool Large) const2985 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
2986 SelectionDAG &DAG,
2987 unsigned Opc,
2988 bool Large) const {
2989 SDLoc DL(N);
2990 EVT Ty = getPointerTy(DAG.getDataLayout());
2991 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
2992
2993 // This is not actually used, but is necessary for successfully matching the
2994 // PseudoLA_*_LARGE nodes.
2995 SDValue Tmp = DAG.getConstant(0, DL, Ty);
2996
2997 // Use a PC-relative addressing mode to access the dynamic GOT address.
2998 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
2999 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3000 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3001
3002 // Prepare argument list to generate call.
3003 ArgListTy Args;
3004 ArgListEntry Entry;
3005 Entry.Node = Load;
3006 Entry.Ty = CallTy;
3007 Args.push_back(Entry);
3008
3009 // Setup call to __tls_get_addr.
3010 TargetLowering::CallLoweringInfo CLI(DAG);
3011 CLI.setDebugLoc(DL)
3012 .setChain(DAG.getEntryNode())
3013 .setLibCallee(CallingConv::C, CallTy,
3014 DAG.getExternalSymbol("__tls_get_addr", Ty),
3015 std::move(Args));
3016
3017 return LowerCallTo(CLI).first;
3018 }
3019
getTLSDescAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc,bool Large) const3020 SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3021 SelectionDAG &DAG, unsigned Opc,
3022 bool Large) const {
3023 SDLoc DL(N);
3024 EVT Ty = getPointerTy(DAG.getDataLayout());
3025 const GlobalValue *GV = N->getGlobal();
3026
3027 // This is not actually used, but is necessary for successfully matching the
3028 // PseudoLA_*_LARGE nodes.
3029 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3030
3031 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3032 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3033 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3034 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3035 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3036 }
3037
3038 SDValue
lowerGlobalTLSAddress(SDValue Op,SelectionDAG & DAG) const3039 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3040 SelectionDAG &DAG) const {
3041 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3042 CallingConv::GHC)
3043 report_fatal_error("In GHC calling convention TLS is not supported");
3044
3045 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3046 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3047
3048 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3049 assert(N->getOffset() == 0 && "unexpected offset in global node");
3050
3051 if (DAG.getTarget().useEmulatedTLS())
3052 reportFatalUsageError("the emulated TLS is prohibited");
3053
3054 bool IsDesc = DAG.getTarget().useTLSDESC();
3055
3056 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3057 case TLSModel::GeneralDynamic:
3058 // In this model, application code calls the dynamic linker function
3059 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3060 // runtime.
3061 if (!IsDesc)
3062 return getDynamicTLSAddr(N, DAG,
3063 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3064 : LoongArch::PseudoLA_TLS_GD,
3065 Large);
3066 break;
3067 case TLSModel::LocalDynamic:
3068 // Same as GeneralDynamic, except for assembly modifiers and relocation
3069 // records.
3070 if (!IsDesc)
3071 return getDynamicTLSAddr(N, DAG,
3072 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3073 : LoongArch::PseudoLA_TLS_LD,
3074 Large);
3075 break;
3076 case TLSModel::InitialExec:
3077 // This model uses the GOT to resolve TLS offsets.
3078 return getStaticTLSAddr(N, DAG,
3079 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3080 : LoongArch::PseudoLA_TLS_IE,
3081 /*UseGOT=*/true, Large);
3082 case TLSModel::LocalExec:
3083 // This model is used when static linking as the TLS offsets are resolved
3084 // during program linking.
3085 //
3086 // This node doesn't need an extra argument for the large code model.
3087 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3088 /*UseGOT=*/false, Large);
3089 }
3090
3091 return getTLSDescAddr(N, DAG,
3092 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3093 : LoongArch::PseudoLA_TLS_DESC,
3094 Large);
3095 }
3096
3097 template <unsigned N>
checkIntrinsicImmArg(SDValue Op,unsigned ImmOp,SelectionDAG & DAG,bool IsSigned=false)3098 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
3099 SelectionDAG &DAG, bool IsSigned = false) {
3100 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3101 // Check the ImmArg.
3102 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3103 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3104 DAG.getContext()->emitError(Op->getOperationName(0) +
3105 ": argument out of range.");
3106 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3107 }
3108 return SDValue();
3109 }
3110
3111 SDValue
lowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const3112 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3113 SelectionDAG &DAG) const {
3114 switch (Op.getConstantOperandVal(0)) {
3115 default:
3116 return SDValue(); // Don't custom lower most intrinsics.
3117 case Intrinsic::thread_pointer: {
3118 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3119 return DAG.getRegister(LoongArch::R2, PtrVT);
3120 }
3121 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3122 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3123 case Intrinsic::loongarch_lsx_vreplvei_d:
3124 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3125 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3126 case Intrinsic::loongarch_lsx_vreplvei_w:
3127 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3128 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3129 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3130 case Intrinsic::loongarch_lasx_xvpickve_d:
3131 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3132 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3133 case Intrinsic::loongarch_lasx_xvinsve0_d:
3134 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3135 case Intrinsic::loongarch_lsx_vsat_b:
3136 case Intrinsic::loongarch_lsx_vsat_bu:
3137 case Intrinsic::loongarch_lsx_vrotri_b:
3138 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3139 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3140 case Intrinsic::loongarch_lsx_vsrlri_b:
3141 case Intrinsic::loongarch_lsx_vsrari_b:
3142 case Intrinsic::loongarch_lsx_vreplvei_h:
3143 case Intrinsic::loongarch_lasx_xvsat_b:
3144 case Intrinsic::loongarch_lasx_xvsat_bu:
3145 case Intrinsic::loongarch_lasx_xvrotri_b:
3146 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3147 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3148 case Intrinsic::loongarch_lasx_xvsrlri_b:
3149 case Intrinsic::loongarch_lasx_xvsrari_b:
3150 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3151 case Intrinsic::loongarch_lasx_xvpickve_w:
3152 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3153 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3154 case Intrinsic::loongarch_lasx_xvinsve0_w:
3155 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3156 case Intrinsic::loongarch_lsx_vsat_h:
3157 case Intrinsic::loongarch_lsx_vsat_hu:
3158 case Intrinsic::loongarch_lsx_vrotri_h:
3159 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3160 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3161 case Intrinsic::loongarch_lsx_vsrlri_h:
3162 case Intrinsic::loongarch_lsx_vsrari_h:
3163 case Intrinsic::loongarch_lsx_vreplvei_b:
3164 case Intrinsic::loongarch_lasx_xvsat_h:
3165 case Intrinsic::loongarch_lasx_xvsat_hu:
3166 case Intrinsic::loongarch_lasx_xvrotri_h:
3167 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3168 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3169 case Intrinsic::loongarch_lasx_xvsrlri_h:
3170 case Intrinsic::loongarch_lasx_xvsrari_h:
3171 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3172 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3173 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3174 case Intrinsic::loongarch_lsx_vsrani_b_h:
3175 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3176 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3177 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3178 case Intrinsic::loongarch_lsx_vssrani_b_h:
3179 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3180 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3181 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3182 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3183 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3184 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3185 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3186 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3187 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3188 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3189 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3190 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3191 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3192 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3193 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3194 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3195 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3196 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3197 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3198 case Intrinsic::loongarch_lsx_vsat_w:
3199 case Intrinsic::loongarch_lsx_vsat_wu:
3200 case Intrinsic::loongarch_lsx_vrotri_w:
3201 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3202 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3203 case Intrinsic::loongarch_lsx_vsrlri_w:
3204 case Intrinsic::loongarch_lsx_vsrari_w:
3205 case Intrinsic::loongarch_lsx_vslei_bu:
3206 case Intrinsic::loongarch_lsx_vslei_hu:
3207 case Intrinsic::loongarch_lsx_vslei_wu:
3208 case Intrinsic::loongarch_lsx_vslei_du:
3209 case Intrinsic::loongarch_lsx_vslti_bu:
3210 case Intrinsic::loongarch_lsx_vslti_hu:
3211 case Intrinsic::loongarch_lsx_vslti_wu:
3212 case Intrinsic::loongarch_lsx_vslti_du:
3213 case Intrinsic::loongarch_lsx_vbsll_v:
3214 case Intrinsic::loongarch_lsx_vbsrl_v:
3215 case Intrinsic::loongarch_lasx_xvsat_w:
3216 case Intrinsic::loongarch_lasx_xvsat_wu:
3217 case Intrinsic::loongarch_lasx_xvrotri_w:
3218 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3219 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3220 case Intrinsic::loongarch_lasx_xvsrlri_w:
3221 case Intrinsic::loongarch_lasx_xvsrari_w:
3222 case Intrinsic::loongarch_lasx_xvslei_bu:
3223 case Intrinsic::loongarch_lasx_xvslei_hu:
3224 case Intrinsic::loongarch_lasx_xvslei_wu:
3225 case Intrinsic::loongarch_lasx_xvslei_du:
3226 case Intrinsic::loongarch_lasx_xvslti_bu:
3227 case Intrinsic::loongarch_lasx_xvslti_hu:
3228 case Intrinsic::loongarch_lasx_xvslti_wu:
3229 case Intrinsic::loongarch_lasx_xvslti_du:
3230 case Intrinsic::loongarch_lasx_xvbsll_v:
3231 case Intrinsic::loongarch_lasx_xvbsrl_v:
3232 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3233 case Intrinsic::loongarch_lsx_vseqi_b:
3234 case Intrinsic::loongarch_lsx_vseqi_h:
3235 case Intrinsic::loongarch_lsx_vseqi_w:
3236 case Intrinsic::loongarch_lsx_vseqi_d:
3237 case Intrinsic::loongarch_lsx_vslei_b:
3238 case Intrinsic::loongarch_lsx_vslei_h:
3239 case Intrinsic::loongarch_lsx_vslei_w:
3240 case Intrinsic::loongarch_lsx_vslei_d:
3241 case Intrinsic::loongarch_lsx_vslti_b:
3242 case Intrinsic::loongarch_lsx_vslti_h:
3243 case Intrinsic::loongarch_lsx_vslti_w:
3244 case Intrinsic::loongarch_lsx_vslti_d:
3245 case Intrinsic::loongarch_lasx_xvseqi_b:
3246 case Intrinsic::loongarch_lasx_xvseqi_h:
3247 case Intrinsic::loongarch_lasx_xvseqi_w:
3248 case Intrinsic::loongarch_lasx_xvseqi_d:
3249 case Intrinsic::loongarch_lasx_xvslei_b:
3250 case Intrinsic::loongarch_lasx_xvslei_h:
3251 case Intrinsic::loongarch_lasx_xvslei_w:
3252 case Intrinsic::loongarch_lasx_xvslei_d:
3253 case Intrinsic::loongarch_lasx_xvslti_b:
3254 case Intrinsic::loongarch_lasx_xvslti_h:
3255 case Intrinsic::loongarch_lasx_xvslti_w:
3256 case Intrinsic::loongarch_lasx_xvslti_d:
3257 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3258 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3259 case Intrinsic::loongarch_lsx_vsrani_h_w:
3260 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3261 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3262 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3263 case Intrinsic::loongarch_lsx_vssrani_h_w:
3264 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3265 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3266 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3267 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3268 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3269 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3270 case Intrinsic::loongarch_lsx_vfrstpi_b:
3271 case Intrinsic::loongarch_lsx_vfrstpi_h:
3272 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3273 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3274 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3275 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3276 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3277 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3278 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3279 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3280 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3281 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3282 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3283 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3284 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3285 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3286 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3287 case Intrinsic::loongarch_lsx_vsat_d:
3288 case Intrinsic::loongarch_lsx_vsat_du:
3289 case Intrinsic::loongarch_lsx_vrotri_d:
3290 case Intrinsic::loongarch_lsx_vsrlri_d:
3291 case Intrinsic::loongarch_lsx_vsrari_d:
3292 case Intrinsic::loongarch_lasx_xvsat_d:
3293 case Intrinsic::loongarch_lasx_xvsat_du:
3294 case Intrinsic::loongarch_lasx_xvrotri_d:
3295 case Intrinsic::loongarch_lasx_xvsrlri_d:
3296 case Intrinsic::loongarch_lasx_xvsrari_d:
3297 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3298 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3299 case Intrinsic::loongarch_lsx_vsrani_w_d:
3300 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3301 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3302 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3303 case Intrinsic::loongarch_lsx_vssrani_w_d:
3304 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3305 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3306 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3307 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3308 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3309 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3310 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3311 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3312 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3313 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3314 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3315 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3316 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3317 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3318 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3319 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3320 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3321 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3322 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3323 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3324 case Intrinsic::loongarch_lsx_vsrani_d_q:
3325 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3326 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3327 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3328 case Intrinsic::loongarch_lsx_vssrani_d_q:
3329 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3330 case Intrinsic::loongarch_lsx_vssrani_du_q:
3331 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3332 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3333 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3334 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3335 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3336 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3337 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3338 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3339 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3340 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3341 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3342 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3343 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3344 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3345 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3346 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3347 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3348 case Intrinsic::loongarch_lsx_vnori_b:
3349 case Intrinsic::loongarch_lsx_vshuf4i_b:
3350 case Intrinsic::loongarch_lsx_vshuf4i_h:
3351 case Intrinsic::loongarch_lsx_vshuf4i_w:
3352 case Intrinsic::loongarch_lasx_xvnori_b:
3353 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3354 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3355 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3356 case Intrinsic::loongarch_lasx_xvpermi_d:
3357 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3358 case Intrinsic::loongarch_lsx_vshuf4i_d:
3359 case Intrinsic::loongarch_lsx_vpermi_w:
3360 case Intrinsic::loongarch_lsx_vbitseli_b:
3361 case Intrinsic::loongarch_lsx_vextrins_b:
3362 case Intrinsic::loongarch_lsx_vextrins_h:
3363 case Intrinsic::loongarch_lsx_vextrins_w:
3364 case Intrinsic::loongarch_lsx_vextrins_d:
3365 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3366 case Intrinsic::loongarch_lasx_xvpermi_w:
3367 case Intrinsic::loongarch_lasx_xvpermi_q:
3368 case Intrinsic::loongarch_lasx_xvbitseli_b:
3369 case Intrinsic::loongarch_lasx_xvextrins_b:
3370 case Intrinsic::loongarch_lasx_xvextrins_h:
3371 case Intrinsic::loongarch_lasx_xvextrins_w:
3372 case Intrinsic::loongarch_lasx_xvextrins_d:
3373 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3374 case Intrinsic::loongarch_lsx_vrepli_b:
3375 case Intrinsic::loongarch_lsx_vrepli_h:
3376 case Intrinsic::loongarch_lsx_vrepli_w:
3377 case Intrinsic::loongarch_lsx_vrepli_d:
3378 case Intrinsic::loongarch_lasx_xvrepli_b:
3379 case Intrinsic::loongarch_lasx_xvrepli_h:
3380 case Intrinsic::loongarch_lasx_xvrepli_w:
3381 case Intrinsic::loongarch_lasx_xvrepli_d:
3382 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3383 case Intrinsic::loongarch_lsx_vldi:
3384 case Intrinsic::loongarch_lasx_xvldi:
3385 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3386 }
3387 }
3388
3389 // Helper function that emits error message for intrinsics with chain and return
3390 // merge values of a UNDEF and the chain.
emitIntrinsicWithChainErrorMessage(SDValue Op,StringRef ErrorMsg,SelectionDAG & DAG)3391 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
3392 StringRef ErrorMsg,
3393 SelectionDAG &DAG) {
3394 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3395 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3396 SDLoc(Op));
3397 }
3398
3399 SDValue
lowerINTRINSIC_W_CHAIN(SDValue Op,SelectionDAG & DAG) const3400 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3401 SelectionDAG &DAG) const {
3402 SDLoc DL(Op);
3403 MVT GRLenVT = Subtarget.getGRLenVT();
3404 EVT VT = Op.getValueType();
3405 SDValue Chain = Op.getOperand(0);
3406 const StringRef ErrorMsgOOR = "argument out of range";
3407 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3408 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3409
3410 switch (Op.getConstantOperandVal(1)) {
3411 default:
3412 return Op;
3413 case Intrinsic::loongarch_crc_w_b_w:
3414 case Intrinsic::loongarch_crc_w_h_w:
3415 case Intrinsic::loongarch_crc_w_w_w:
3416 case Intrinsic::loongarch_crc_w_d_w:
3417 case Intrinsic::loongarch_crcc_w_b_w:
3418 case Intrinsic::loongarch_crcc_w_h_w:
3419 case Intrinsic::loongarch_crcc_w_w_w:
3420 case Intrinsic::loongarch_crcc_w_d_w:
3421 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3422 case Intrinsic::loongarch_csrrd_w:
3423 case Intrinsic::loongarch_csrrd_d: {
3424 unsigned Imm = Op.getConstantOperandVal(2);
3425 return !isUInt<14>(Imm)
3426 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3427 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3428 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3429 }
3430 case Intrinsic::loongarch_csrwr_w:
3431 case Intrinsic::loongarch_csrwr_d: {
3432 unsigned Imm = Op.getConstantOperandVal(3);
3433 return !isUInt<14>(Imm)
3434 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3435 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3436 {Chain, Op.getOperand(2),
3437 DAG.getConstant(Imm, DL, GRLenVT)});
3438 }
3439 case Intrinsic::loongarch_csrxchg_w:
3440 case Intrinsic::loongarch_csrxchg_d: {
3441 unsigned Imm = Op.getConstantOperandVal(4);
3442 return !isUInt<14>(Imm)
3443 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3444 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3445 {Chain, Op.getOperand(2), Op.getOperand(3),
3446 DAG.getConstant(Imm, DL, GRLenVT)});
3447 }
3448 case Intrinsic::loongarch_iocsrrd_d: {
3449 return DAG.getNode(
3450 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
3451 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
3452 }
3453 #define IOCSRRD_CASE(NAME, NODE) \
3454 case Intrinsic::loongarch_##NAME: { \
3455 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3456 {Chain, Op.getOperand(2)}); \
3457 }
3458 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3459 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3460 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3461 #undef IOCSRRD_CASE
3462 case Intrinsic::loongarch_cpucfg: {
3463 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3464 {Chain, Op.getOperand(2)});
3465 }
3466 case Intrinsic::loongarch_lddir_d: {
3467 unsigned Imm = Op.getConstantOperandVal(3);
3468 return !isUInt<8>(Imm)
3469 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3470 : Op;
3471 }
3472 case Intrinsic::loongarch_movfcsr2gr: {
3473 if (!Subtarget.hasBasicF())
3474 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
3475 unsigned Imm = Op.getConstantOperandVal(2);
3476 return !isUInt<2>(Imm)
3477 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3478 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
3479 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3480 }
3481 case Intrinsic::loongarch_lsx_vld:
3482 case Intrinsic::loongarch_lsx_vldrepl_b:
3483 case Intrinsic::loongarch_lasx_xvld:
3484 case Intrinsic::loongarch_lasx_xvldrepl_b:
3485 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3486 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3487 : SDValue();
3488 case Intrinsic::loongarch_lsx_vldrepl_h:
3489 case Intrinsic::loongarch_lasx_xvldrepl_h:
3490 return !isShiftedInt<11, 1>(
3491 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3492 ? emitIntrinsicWithChainErrorMessage(
3493 Op, "argument out of range or not a multiple of 2", DAG)
3494 : SDValue();
3495 case Intrinsic::loongarch_lsx_vldrepl_w:
3496 case Intrinsic::loongarch_lasx_xvldrepl_w:
3497 return !isShiftedInt<10, 2>(
3498 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3499 ? emitIntrinsicWithChainErrorMessage(
3500 Op, "argument out of range or not a multiple of 4", DAG)
3501 : SDValue();
3502 case Intrinsic::loongarch_lsx_vldrepl_d:
3503 case Intrinsic::loongarch_lasx_xvldrepl_d:
3504 return !isShiftedInt<9, 3>(
3505 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3506 ? emitIntrinsicWithChainErrorMessage(
3507 Op, "argument out of range or not a multiple of 8", DAG)
3508 : SDValue();
3509 }
3510 }
3511
3512 // Helper function that emits error message for intrinsics with void return
3513 // value and return the chain.
emitIntrinsicErrorMessage(SDValue Op,StringRef ErrorMsg,SelectionDAG & DAG)3514 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
3515 SelectionDAG &DAG) {
3516
3517 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3518 return Op.getOperand(0);
3519 }
3520
lowerINTRINSIC_VOID(SDValue Op,SelectionDAG & DAG) const3521 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3522 SelectionDAG &DAG) const {
3523 SDLoc DL(Op);
3524 MVT GRLenVT = Subtarget.getGRLenVT();
3525 SDValue Chain = Op.getOperand(0);
3526 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
3527 SDValue Op2 = Op.getOperand(2);
3528 const StringRef ErrorMsgOOR = "argument out of range";
3529 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3530 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3531 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3532
3533 switch (IntrinsicEnum) {
3534 default:
3535 // TODO: Add more Intrinsics.
3536 return SDValue();
3537 case Intrinsic::loongarch_cacop_d:
3538 case Intrinsic::loongarch_cacop_w: {
3539 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3540 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
3541 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3542 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
3543 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3544 unsigned Imm1 = Op2->getAsZExtVal();
3545 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
3546 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
3547 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
3548 return Op;
3549 }
3550 case Intrinsic::loongarch_dbar: {
3551 unsigned Imm = Op2->getAsZExtVal();
3552 return !isUInt<15>(Imm)
3553 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3554 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
3555 DAG.getConstant(Imm, DL, GRLenVT));
3556 }
3557 case Intrinsic::loongarch_ibar: {
3558 unsigned Imm = Op2->getAsZExtVal();
3559 return !isUInt<15>(Imm)
3560 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3561 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
3562 DAG.getConstant(Imm, DL, GRLenVT));
3563 }
3564 case Intrinsic::loongarch_break: {
3565 unsigned Imm = Op2->getAsZExtVal();
3566 return !isUInt<15>(Imm)
3567 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3568 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
3569 DAG.getConstant(Imm, DL, GRLenVT));
3570 }
3571 case Intrinsic::loongarch_movgr2fcsr: {
3572 if (!Subtarget.hasBasicF())
3573 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
3574 unsigned Imm = Op2->getAsZExtVal();
3575 return !isUInt<2>(Imm)
3576 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3577 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
3578 DAG.getConstant(Imm, DL, GRLenVT),
3579 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
3580 Op.getOperand(3)));
3581 }
3582 case Intrinsic::loongarch_syscall: {
3583 unsigned Imm = Op2->getAsZExtVal();
3584 return !isUInt<15>(Imm)
3585 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3586 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
3587 DAG.getConstant(Imm, DL, GRLenVT));
3588 }
3589 #define IOCSRWR_CASE(NAME, NODE) \
3590 case Intrinsic::loongarch_##NAME: { \
3591 SDValue Op3 = Op.getOperand(3); \
3592 return Subtarget.is64Bit() \
3593 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
3594 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3595 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
3596 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
3597 Op3); \
3598 }
3599 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
3600 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
3601 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
3602 #undef IOCSRWR_CASE
3603 case Intrinsic::loongarch_iocsrwr_d: {
3604 return !Subtarget.is64Bit()
3605 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3606 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
3607 Op2,
3608 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
3609 Op.getOperand(3)));
3610 }
3611 #define ASRT_LE_GT_CASE(NAME) \
3612 case Intrinsic::loongarch_##NAME: { \
3613 return !Subtarget.is64Bit() \
3614 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
3615 : Op; \
3616 }
3617 ASRT_LE_GT_CASE(asrtle_d)
3618 ASRT_LE_GT_CASE(asrtgt_d)
3619 #undef ASRT_LE_GT_CASE
3620 case Intrinsic::loongarch_ldpte_d: {
3621 unsigned Imm = Op.getConstantOperandVal(3);
3622 return !Subtarget.is64Bit()
3623 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3624 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3625 : Op;
3626 }
3627 case Intrinsic::loongarch_lsx_vst:
3628 case Intrinsic::loongarch_lasx_xvst:
3629 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
3630 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3631 : SDValue();
3632 case Intrinsic::loongarch_lasx_xvstelm_b:
3633 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3634 !isUInt<5>(Op.getConstantOperandVal(5)))
3635 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3636 : SDValue();
3637 case Intrinsic::loongarch_lsx_vstelm_b:
3638 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3639 !isUInt<4>(Op.getConstantOperandVal(5)))
3640 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3641 : SDValue();
3642 case Intrinsic::loongarch_lasx_xvstelm_h:
3643 return (!isShiftedInt<8, 1>(
3644 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3645 !isUInt<4>(Op.getConstantOperandVal(5)))
3646 ? emitIntrinsicErrorMessage(
3647 Op, "argument out of range or not a multiple of 2", DAG)
3648 : SDValue();
3649 case Intrinsic::loongarch_lsx_vstelm_h:
3650 return (!isShiftedInt<8, 1>(
3651 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3652 !isUInt<3>(Op.getConstantOperandVal(5)))
3653 ? emitIntrinsicErrorMessage(
3654 Op, "argument out of range or not a multiple of 2", DAG)
3655 : SDValue();
3656 case Intrinsic::loongarch_lasx_xvstelm_w:
3657 return (!isShiftedInt<8, 2>(
3658 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3659 !isUInt<3>(Op.getConstantOperandVal(5)))
3660 ? emitIntrinsicErrorMessage(
3661 Op, "argument out of range or not a multiple of 4", DAG)
3662 : SDValue();
3663 case Intrinsic::loongarch_lsx_vstelm_w:
3664 return (!isShiftedInt<8, 2>(
3665 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3666 !isUInt<2>(Op.getConstantOperandVal(5)))
3667 ? emitIntrinsicErrorMessage(
3668 Op, "argument out of range or not a multiple of 4", DAG)
3669 : SDValue();
3670 case Intrinsic::loongarch_lasx_xvstelm_d:
3671 return (!isShiftedInt<8, 3>(
3672 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3673 !isUInt<2>(Op.getConstantOperandVal(5)))
3674 ? emitIntrinsicErrorMessage(
3675 Op, "argument out of range or not a multiple of 8", DAG)
3676 : SDValue();
3677 case Intrinsic::loongarch_lsx_vstelm_d:
3678 return (!isShiftedInt<8, 3>(
3679 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3680 !isUInt<1>(Op.getConstantOperandVal(5)))
3681 ? emitIntrinsicErrorMessage(
3682 Op, "argument out of range or not a multiple of 8", DAG)
3683 : SDValue();
3684 }
3685 }
3686
lowerShiftLeftParts(SDValue Op,SelectionDAG & DAG) const3687 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
3688 SelectionDAG &DAG) const {
3689 SDLoc DL(Op);
3690 SDValue Lo = Op.getOperand(0);
3691 SDValue Hi = Op.getOperand(1);
3692 SDValue Shamt = Op.getOperand(2);
3693 EVT VT = Lo.getValueType();
3694
3695 // if Shamt-GRLen < 0: // Shamt < GRLen
3696 // Lo = Lo << Shamt
3697 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
3698 // else:
3699 // Lo = 0
3700 // Hi = Lo << (Shamt-GRLen)
3701
3702 SDValue Zero = DAG.getConstant(0, DL, VT);
3703 SDValue One = DAG.getConstant(1, DL, VT);
3704 SDValue MinusGRLen =
3705 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
3706 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
3707 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
3708 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
3709
3710 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3711 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3712 SDValue ShiftRightLo =
3713 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
3714 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3715 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3716 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
3717
3718 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
3719
3720 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3721 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3722
3723 SDValue Parts[2] = {Lo, Hi};
3724 return DAG.getMergeValues(Parts, DL);
3725 }
3726
lowerShiftRightParts(SDValue Op,SelectionDAG & DAG,bool IsSRA) const3727 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
3728 SelectionDAG &DAG,
3729 bool IsSRA) const {
3730 SDLoc DL(Op);
3731 SDValue Lo = Op.getOperand(0);
3732 SDValue Hi = Op.getOperand(1);
3733 SDValue Shamt = Op.getOperand(2);
3734 EVT VT = Lo.getValueType();
3735
3736 // SRA expansion:
3737 // if Shamt-GRLen < 0: // Shamt < GRLen
3738 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
3739 // Hi = Hi >>s Shamt
3740 // else:
3741 // Lo = Hi >>s (Shamt-GRLen);
3742 // Hi = Hi >>s (GRLen-1)
3743 //
3744 // SRL expansion:
3745 // if Shamt-GRLen < 0: // Shamt < GRLen
3746 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
3747 // Hi = Hi >>u Shamt
3748 // else:
3749 // Lo = Hi >>u (Shamt-GRLen);
3750 // Hi = 0;
3751
3752 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3753
3754 SDValue Zero = DAG.getConstant(0, DL, VT);
3755 SDValue One = DAG.getConstant(1, DL, VT);
3756 SDValue MinusGRLen =
3757 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
3758 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
3759 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
3760 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
3761
3762 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3763 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3764 SDValue ShiftLeftHi =
3765 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
3766 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3767 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3768 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
3769 SDValue HiFalse =
3770 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
3771
3772 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
3773
3774 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3775 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3776
3777 SDValue Parts[2] = {Lo, Hi};
3778 return DAG.getMergeValues(Parts, DL);
3779 }
3780
3781 // Returns the opcode of the target-specific SDNode that implements the 32-bit
3782 // form of the given Opcode.
getLoongArchWOpcode(unsigned Opcode)3783 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
3784 switch (Opcode) {
3785 default:
3786 llvm_unreachable("Unexpected opcode");
3787 case ISD::SDIV:
3788 return LoongArchISD::DIV_W;
3789 case ISD::UDIV:
3790 return LoongArchISD::DIV_WU;
3791 case ISD::SREM:
3792 return LoongArchISD::MOD_W;
3793 case ISD::UREM:
3794 return LoongArchISD::MOD_WU;
3795 case ISD::SHL:
3796 return LoongArchISD::SLL_W;
3797 case ISD::SRA:
3798 return LoongArchISD::SRA_W;
3799 case ISD::SRL:
3800 return LoongArchISD::SRL_W;
3801 case ISD::ROTL:
3802 case ISD::ROTR:
3803 return LoongArchISD::ROTR_W;
3804 case ISD::CTTZ:
3805 return LoongArchISD::CTZ_W;
3806 case ISD::CTLZ:
3807 return LoongArchISD::CLZ_W;
3808 }
3809 }
3810
3811 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
3812 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
3813 // otherwise be promoted to i64, making it difficult to select the
3814 // SLL_W/.../*W later one because the fact the operation was originally of
3815 // type i8/i16/i32 is lost.
customLegalizeToWOp(SDNode * N,SelectionDAG & DAG,int NumOp,unsigned ExtOpc=ISD::ANY_EXTEND)3816 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
3817 unsigned ExtOpc = ISD::ANY_EXTEND) {
3818 SDLoc DL(N);
3819 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
3820 SDValue NewOp0, NewRes;
3821
3822 switch (NumOp) {
3823 default:
3824 llvm_unreachable("Unexpected NumOp");
3825 case 1: {
3826 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
3827 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
3828 break;
3829 }
3830 case 2: {
3831 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
3832 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
3833 if (N->getOpcode() == ISD::ROTL) {
3834 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
3835 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
3836 }
3837 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
3838 break;
3839 }
3840 // TODO:Handle more NumOp.
3841 }
3842
3843 // ReplaceNodeResults requires we maintain the same type for the return
3844 // value.
3845 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
3846 }
3847
3848 // Converts the given 32-bit operation to a i64 operation with signed extension
3849 // semantic to reduce the signed extension instructions.
customLegalizeToWOpWithSExt(SDNode * N,SelectionDAG & DAG)3850 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
3851 SDLoc DL(N);
3852 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3853 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
3854 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
3855 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
3856 DAG.getValueType(MVT::i32));
3857 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
3858 }
3859
3860 // Helper function that emits error message for intrinsics with/without chain
3861 // and return a UNDEF or and the chain as the results.
emitErrorAndReplaceIntrinsicResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,StringRef ErrorMsg,bool WithChain=true)3862 static void emitErrorAndReplaceIntrinsicResults(
3863 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
3864 StringRef ErrorMsg, bool WithChain = true) {
3865 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
3866 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
3867 if (!WithChain)
3868 return;
3869 Results.push_back(N->getOperand(0));
3870 }
3871
3872 template <unsigned N>
3873 static void
replaceVPICKVE2GRResults(SDNode * Node,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,unsigned ResOp)3874 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
3875 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
3876 unsigned ResOp) {
3877 const StringRef ErrorMsgOOR = "argument out of range";
3878 unsigned Imm = Node->getConstantOperandVal(2);
3879 if (!isUInt<N>(Imm)) {
3880 emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
3881 /*WithChain=*/false);
3882 return;
3883 }
3884 SDLoc DL(Node);
3885 SDValue Vec = Node->getOperand(1);
3886
3887 SDValue PickElt =
3888 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
3889 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
3890 DAG.getValueType(Vec.getValueType().getVectorElementType()));
3891 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
3892 PickElt.getValue(0)));
3893 }
3894
replaceVecCondBranchResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,unsigned ResOp)3895 static void replaceVecCondBranchResults(SDNode *N,
3896 SmallVectorImpl<SDValue> &Results,
3897 SelectionDAG &DAG,
3898 const LoongArchSubtarget &Subtarget,
3899 unsigned ResOp) {
3900 SDLoc DL(N);
3901 SDValue Vec = N->getOperand(1);
3902
3903 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
3904 Results.push_back(
3905 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
3906 }
3907
3908 static void
replaceINTRINSIC_WO_CHAINResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget)3909 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
3910 SelectionDAG &DAG,
3911 const LoongArchSubtarget &Subtarget) {
3912 switch (N->getConstantOperandVal(0)) {
3913 default:
3914 llvm_unreachable("Unexpected Intrinsic.");
3915 case Intrinsic::loongarch_lsx_vpickve2gr_b:
3916 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
3917 LoongArchISD::VPICK_SEXT_ELT);
3918 break;
3919 case Intrinsic::loongarch_lsx_vpickve2gr_h:
3920 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
3921 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
3922 LoongArchISD::VPICK_SEXT_ELT);
3923 break;
3924 case Intrinsic::loongarch_lsx_vpickve2gr_w:
3925 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
3926 LoongArchISD::VPICK_SEXT_ELT);
3927 break;
3928 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
3929 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
3930 LoongArchISD::VPICK_ZEXT_ELT);
3931 break;
3932 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
3933 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
3934 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
3935 LoongArchISD::VPICK_ZEXT_ELT);
3936 break;
3937 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
3938 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
3939 LoongArchISD::VPICK_ZEXT_ELT);
3940 break;
3941 case Intrinsic::loongarch_lsx_bz_b:
3942 case Intrinsic::loongarch_lsx_bz_h:
3943 case Intrinsic::loongarch_lsx_bz_w:
3944 case Intrinsic::loongarch_lsx_bz_d:
3945 case Intrinsic::loongarch_lasx_xbz_b:
3946 case Intrinsic::loongarch_lasx_xbz_h:
3947 case Intrinsic::loongarch_lasx_xbz_w:
3948 case Intrinsic::loongarch_lasx_xbz_d:
3949 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3950 LoongArchISD::VALL_ZERO);
3951 break;
3952 case Intrinsic::loongarch_lsx_bz_v:
3953 case Intrinsic::loongarch_lasx_xbz_v:
3954 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3955 LoongArchISD::VANY_ZERO);
3956 break;
3957 case Intrinsic::loongarch_lsx_bnz_b:
3958 case Intrinsic::loongarch_lsx_bnz_h:
3959 case Intrinsic::loongarch_lsx_bnz_w:
3960 case Intrinsic::loongarch_lsx_bnz_d:
3961 case Intrinsic::loongarch_lasx_xbnz_b:
3962 case Intrinsic::loongarch_lasx_xbnz_h:
3963 case Intrinsic::loongarch_lasx_xbnz_w:
3964 case Intrinsic::loongarch_lasx_xbnz_d:
3965 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3966 LoongArchISD::VALL_NONZERO);
3967 break;
3968 case Intrinsic::loongarch_lsx_bnz_v:
3969 case Intrinsic::loongarch_lasx_xbnz_v:
3970 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3971 LoongArchISD::VANY_NONZERO);
3972 break;
3973 }
3974 }
3975
replaceCMP_XCHG_128Results(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG)3976 static void replaceCMP_XCHG_128Results(SDNode *N,
3977 SmallVectorImpl<SDValue> &Results,
3978 SelectionDAG &DAG) {
3979 assert(N->getValueType(0) == MVT::i128 &&
3980 "AtomicCmpSwap on types less than 128 should be legal");
3981 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3982
3983 unsigned Opcode;
3984 switch (MemOp->getMergedOrdering()) {
3985 case AtomicOrdering::Acquire:
3986 case AtomicOrdering::AcquireRelease:
3987 case AtomicOrdering::SequentiallyConsistent:
3988 Opcode = LoongArch::PseudoCmpXchg128Acquire;
3989 break;
3990 case AtomicOrdering::Monotonic:
3991 case AtomicOrdering::Release:
3992 Opcode = LoongArch::PseudoCmpXchg128;
3993 break;
3994 default:
3995 llvm_unreachable("Unexpected ordering!");
3996 }
3997
3998 SDLoc DL(N);
3999 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4000 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4001 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4002 NewVal.first, NewVal.second, N->getOperand(0)};
4003
4004 SDNode *CmpSwap = DAG.getMachineNode(
4005 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4006 Ops);
4007 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4008 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4009 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4010 Results.push_back(SDValue(CmpSwap, 3));
4011 }
4012
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const4013 void LoongArchTargetLowering::ReplaceNodeResults(
4014 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
4015 SDLoc DL(N);
4016 EVT VT = N->getValueType(0);
4017 switch (N->getOpcode()) {
4018 default:
4019 llvm_unreachable("Don't know how to legalize this operation");
4020 case ISD::ADD:
4021 case ISD::SUB:
4022 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4023 "Unexpected custom legalisation");
4024 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4025 break;
4026 case ISD::SDIV:
4027 case ISD::UDIV:
4028 case ISD::SREM:
4029 case ISD::UREM:
4030 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4031 "Unexpected custom legalisation");
4032 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4033 Subtarget.hasDiv32() && VT == MVT::i32
4034 ? ISD::ANY_EXTEND
4035 : ISD::SIGN_EXTEND));
4036 break;
4037 case ISD::SHL:
4038 case ISD::SRA:
4039 case ISD::SRL:
4040 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4041 "Unexpected custom legalisation");
4042 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4043 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4044 break;
4045 }
4046 break;
4047 case ISD::ROTL:
4048 case ISD::ROTR:
4049 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4050 "Unexpected custom legalisation");
4051 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4052 break;
4053 case ISD::FP_TO_SINT: {
4054 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4055 "Unexpected custom legalisation");
4056 SDValue Src = N->getOperand(0);
4057 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4058 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4059 TargetLowering::TypeSoftenFloat) {
4060 if (!isTypeLegal(Src.getValueType()))
4061 return;
4062 if (Src.getValueType() == MVT::f16)
4063 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4064 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4065 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4066 return;
4067 }
4068 // If the FP type needs to be softened, emit a library call using the 'si'
4069 // version. If we left it to default legalization we'd end up with 'di'.
4070 RTLIB::Libcall LC;
4071 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4072 MakeLibCallOptions CallOptions;
4073 EVT OpVT = Src.getValueType();
4074 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
4075 SDValue Chain = SDValue();
4076 SDValue Result;
4077 std::tie(Result, Chain) =
4078 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4079 Results.push_back(Result);
4080 break;
4081 }
4082 case ISD::BITCAST: {
4083 SDValue Src = N->getOperand(0);
4084 EVT SrcVT = Src.getValueType();
4085 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4086 Subtarget.hasBasicF()) {
4087 SDValue Dst =
4088 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4089 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4090 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4091 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
4092 DAG.getVTList(MVT::i32, MVT::i32), Src);
4093 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4094 NewReg.getValue(0), NewReg.getValue(1));
4095 Results.push_back(RetReg);
4096 }
4097 break;
4098 }
4099 case ISD::FP_TO_UINT: {
4100 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4101 "Unexpected custom legalisation");
4102 auto &TLI = DAG.getTargetLoweringInfo();
4103 SDValue Tmp1, Tmp2;
4104 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4105 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4106 break;
4107 }
4108 case ISD::BSWAP: {
4109 SDValue Src = N->getOperand(0);
4110 assert((VT == MVT::i16 || VT == MVT::i32) &&
4111 "Unexpected custom legalization");
4112 MVT GRLenVT = Subtarget.getGRLenVT();
4113 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4114 SDValue Tmp;
4115 switch (VT.getSizeInBits()) {
4116 default:
4117 llvm_unreachable("Unexpected operand width");
4118 case 16:
4119 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4120 break;
4121 case 32:
4122 // Only LA64 will get to here due to the size mismatch between VT and
4123 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4124 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4125 break;
4126 }
4127 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4128 break;
4129 }
4130 case ISD::BITREVERSE: {
4131 SDValue Src = N->getOperand(0);
4132 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4133 "Unexpected custom legalization");
4134 MVT GRLenVT = Subtarget.getGRLenVT();
4135 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4136 SDValue Tmp;
4137 switch (VT.getSizeInBits()) {
4138 default:
4139 llvm_unreachable("Unexpected operand width");
4140 case 8:
4141 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4142 break;
4143 case 32:
4144 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4145 break;
4146 }
4147 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4148 break;
4149 }
4150 case ISD::CTLZ:
4151 case ISD::CTTZ: {
4152 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4153 "Unexpected custom legalisation");
4154 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4155 break;
4156 }
4157 case ISD::INTRINSIC_W_CHAIN: {
4158 SDValue Chain = N->getOperand(0);
4159 SDValue Op2 = N->getOperand(2);
4160 MVT GRLenVT = Subtarget.getGRLenVT();
4161 const StringRef ErrorMsgOOR = "argument out of range";
4162 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4163 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4164
4165 switch (N->getConstantOperandVal(1)) {
4166 default:
4167 llvm_unreachable("Unexpected Intrinsic.");
4168 case Intrinsic::loongarch_movfcsr2gr: {
4169 if (!Subtarget.hasBasicF()) {
4170 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4171 return;
4172 }
4173 unsigned Imm = Op2->getAsZExtVal();
4174 if (!isUInt<2>(Imm)) {
4175 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4176 return;
4177 }
4178 SDValue MOVFCSR2GRResults = DAG.getNode(
4179 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4180 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4181 Results.push_back(
4182 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4183 Results.push_back(MOVFCSR2GRResults.getValue(1));
4184 break;
4185 }
4186 #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4187 case Intrinsic::loongarch_##NAME: { \
4188 SDValue NODE = DAG.getNode( \
4189 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4190 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4191 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4192 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4193 Results.push_back(NODE.getValue(1)); \
4194 break; \
4195 }
4196 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4197 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4198 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4199 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4200 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4201 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4202 #undef CRC_CASE_EXT_BINARYOP
4203
4204 #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4205 case Intrinsic::loongarch_##NAME: { \
4206 SDValue NODE = DAG.getNode( \
4207 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4208 {Chain, Op2, \
4209 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4210 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4211 Results.push_back(NODE.getValue(1)); \
4212 break; \
4213 }
4214 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4215 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4216 #undef CRC_CASE_EXT_UNARYOP
4217 #define CSR_CASE(ID) \
4218 case Intrinsic::loongarch_##ID: { \
4219 if (!Subtarget.is64Bit()) \
4220 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4221 break; \
4222 }
4223 CSR_CASE(csrrd_d);
4224 CSR_CASE(csrwr_d);
4225 CSR_CASE(csrxchg_d);
4226 CSR_CASE(iocsrrd_d);
4227 #undef CSR_CASE
4228 case Intrinsic::loongarch_csrrd_w: {
4229 unsigned Imm = Op2->getAsZExtVal();
4230 if (!isUInt<14>(Imm)) {
4231 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4232 return;
4233 }
4234 SDValue CSRRDResults =
4235 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4236 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4237 Results.push_back(
4238 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4239 Results.push_back(CSRRDResults.getValue(1));
4240 break;
4241 }
4242 case Intrinsic::loongarch_csrwr_w: {
4243 unsigned Imm = N->getConstantOperandVal(3);
4244 if (!isUInt<14>(Imm)) {
4245 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4246 return;
4247 }
4248 SDValue CSRWRResults =
4249 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4250 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4251 DAG.getConstant(Imm, DL, GRLenVT)});
4252 Results.push_back(
4253 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4254 Results.push_back(CSRWRResults.getValue(1));
4255 break;
4256 }
4257 case Intrinsic::loongarch_csrxchg_w: {
4258 unsigned Imm = N->getConstantOperandVal(4);
4259 if (!isUInt<14>(Imm)) {
4260 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4261 return;
4262 }
4263 SDValue CSRXCHGResults = DAG.getNode(
4264 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4265 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4266 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4267 DAG.getConstant(Imm, DL, GRLenVT)});
4268 Results.push_back(
4269 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4270 Results.push_back(CSRXCHGResults.getValue(1));
4271 break;
4272 }
4273 #define IOCSRRD_CASE(NAME, NODE) \
4274 case Intrinsic::loongarch_##NAME: { \
4275 SDValue IOCSRRDResults = \
4276 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4277 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4278 Results.push_back( \
4279 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4280 Results.push_back(IOCSRRDResults.getValue(1)); \
4281 break; \
4282 }
4283 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4284 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4285 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4286 #undef IOCSRRD_CASE
4287 case Intrinsic::loongarch_cpucfg: {
4288 SDValue CPUCFGResults =
4289 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4290 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4291 Results.push_back(
4292 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4293 Results.push_back(CPUCFGResults.getValue(1));
4294 break;
4295 }
4296 case Intrinsic::loongarch_lddir_d: {
4297 if (!Subtarget.is64Bit()) {
4298 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4299 return;
4300 }
4301 break;
4302 }
4303 }
4304 break;
4305 }
4306 case ISD::READ_REGISTER: {
4307 if (Subtarget.is64Bit())
4308 DAG.getContext()->emitError(
4309 "On LA64, only 64-bit registers can be read.");
4310 else
4311 DAG.getContext()->emitError(
4312 "On LA32, only 32-bit registers can be read.");
4313 Results.push_back(DAG.getUNDEF(VT));
4314 Results.push_back(N->getOperand(0));
4315 break;
4316 }
4317 case ISD::INTRINSIC_WO_CHAIN: {
4318 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4319 break;
4320 }
4321 case ISD::LROUND: {
4322 SDValue Op0 = N->getOperand(0);
4323 EVT OpVT = Op0.getValueType();
4324 RTLIB::Libcall LC =
4325 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4326 MakeLibCallOptions CallOptions;
4327 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
4328 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4329 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4330 Results.push_back(Result);
4331 break;
4332 }
4333 case ISD::ATOMIC_CMP_SWAP: {
4334 replaceCMP_XCHG_128Results(N, Results, DAG);
4335 break;
4336 }
4337 case ISD::TRUNCATE: {
4338 MVT VT = N->getSimpleValueType(0);
4339 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4340 return;
4341
4342 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4343 SDValue In = N->getOperand(0);
4344 EVT InVT = In.getValueType();
4345 EVT InEltVT = InVT.getVectorElementType();
4346 EVT EltVT = VT.getVectorElementType();
4347 unsigned MinElts = VT.getVectorNumElements();
4348 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4349 unsigned InBits = InVT.getSizeInBits();
4350
4351 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4352 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4353 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4354 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4355 for (unsigned I = 0; I < MinElts; ++I)
4356 TruncMask[I] = Scale * I;
4357
4358 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4359 MVT SVT = In.getSimpleValueType().getScalarType();
4360 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4361 SDValue WidenIn =
4362 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4363 DAG.getVectorIdxConstant(0, DL));
4364 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4365 "Illegal vector type in truncation");
4366 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4367 Results.push_back(
4368 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4369 return;
4370 }
4371 }
4372
4373 break;
4374 }
4375 }
4376 }
4377
performANDCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)4378 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
4379 TargetLowering::DAGCombinerInfo &DCI,
4380 const LoongArchSubtarget &Subtarget) {
4381 if (DCI.isBeforeLegalizeOps())
4382 return SDValue();
4383
4384 SDValue FirstOperand = N->getOperand(0);
4385 SDValue SecondOperand = N->getOperand(1);
4386 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4387 EVT ValTy = N->getValueType(0);
4388 SDLoc DL(N);
4389 uint64_t lsb, msb;
4390 unsigned SMIdx, SMLen;
4391 ConstantSDNode *CN;
4392 SDValue NewOperand;
4393 MVT GRLenVT = Subtarget.getGRLenVT();
4394
4395 // BSTRPICK requires the 32S feature.
4396 if (!Subtarget.has32S())
4397 return SDValue();
4398
4399 // Op's second operand must be a shifted mask.
4400 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4401 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4402 return SDValue();
4403
4404 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4405 // Pattern match BSTRPICK.
4406 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4407 // => BSTRPICK $dst, $src, msb, lsb
4408 // where msb = lsb + len - 1
4409
4410 // The second operand of the shift must be an immediate.
4411 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4412 return SDValue();
4413
4414 lsb = CN->getZExtValue();
4415
4416 // Return if the shifted mask does not start at bit 0 or the sum of its
4417 // length and lsb exceeds the word's size.
4418 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4419 return SDValue();
4420
4421 NewOperand = FirstOperand.getOperand(0);
4422 } else {
4423 // Pattern match BSTRPICK.
4424 // $dst = and $src, (2**len- 1) , if len > 12
4425 // => BSTRPICK $dst, $src, msb, lsb
4426 // where lsb = 0 and msb = len - 1
4427
4428 // If the mask is <= 0xfff, andi can be used instead.
4429 if (CN->getZExtValue() <= 0xfff)
4430 return SDValue();
4431
4432 // Return if the MSB exceeds.
4433 if (SMIdx + SMLen > ValTy.getSizeInBits())
4434 return SDValue();
4435
4436 if (SMIdx > 0) {
4437 // Omit if the constant has more than 2 uses. This a conservative
4438 // decision. Whether it is a win depends on the HW microarchitecture.
4439 // However it should always be better for 1 and 2 uses.
4440 if (CN->use_size() > 2)
4441 return SDValue();
4442 // Return if the constant can be composed by a single LU12I.W.
4443 if ((CN->getZExtValue() & 0xfff) == 0)
4444 return SDValue();
4445 // Return if the constand can be composed by a single ADDI with
4446 // the zero register.
4447 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4448 return SDValue();
4449 }
4450
4451 lsb = SMIdx;
4452 NewOperand = FirstOperand;
4453 }
4454
4455 msb = lsb + SMLen - 1;
4456 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
4457 DAG.getConstant(msb, DL, GRLenVT),
4458 DAG.getConstant(lsb, DL, GRLenVT));
4459 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4460 return NR0;
4461 // Try to optimize to
4462 // bstrpick $Rd, $Rs, msb, lsb
4463 // slli $Rd, $Rd, lsb
4464 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
4465 DAG.getConstant(lsb, DL, GRLenVT));
4466 }
4467
performSRLCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)4468 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
4469 TargetLowering::DAGCombinerInfo &DCI,
4470 const LoongArchSubtarget &Subtarget) {
4471 // BSTRPICK requires the 32S feature.
4472 if (!Subtarget.has32S())
4473 return SDValue();
4474
4475 if (DCI.isBeforeLegalizeOps())
4476 return SDValue();
4477
4478 // $dst = srl (and $src, Mask), Shamt
4479 // =>
4480 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4481 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4482 //
4483
4484 SDValue FirstOperand = N->getOperand(0);
4485 ConstantSDNode *CN;
4486 EVT ValTy = N->getValueType(0);
4487 SDLoc DL(N);
4488 MVT GRLenVT = Subtarget.getGRLenVT();
4489 unsigned MaskIdx, MaskLen;
4490 uint64_t Shamt;
4491
4492 // The first operand must be an AND and the second operand of the AND must be
4493 // a shifted mask.
4494 if (FirstOperand.getOpcode() != ISD::AND ||
4495 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
4496 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
4497 return SDValue();
4498
4499 // The second operand (shift amount) must be an immediate.
4500 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
4501 return SDValue();
4502
4503 Shamt = CN->getZExtValue();
4504 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
4505 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
4506 FirstOperand->getOperand(0),
4507 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4508 DAG.getConstant(Shamt, DL, GRLenVT));
4509
4510 return SDValue();
4511 }
4512
4513 // Helper to peek through bitops/trunc/setcc to determine size of source vector.
4514 // Allows BITCASTCombine to determine what size vector generated a <X x i1>.
checkBitcastSrcVectorSize(SDValue Src,unsigned Size,unsigned Depth)4515 static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4516 unsigned Depth) {
4517 // Limit recursion.
4518 if (Depth >= SelectionDAG::MaxRecursionDepth)
4519 return false;
4520 switch (Src.getOpcode()) {
4521 case ISD::SETCC:
4522 case ISD::TRUNCATE:
4523 return Src.getOperand(0).getValueSizeInBits() == Size;
4524 case ISD::FREEZE:
4525 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
4526 case ISD::AND:
4527 case ISD::XOR:
4528 case ISD::OR:
4529 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
4530 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
4531 case ISD::SELECT:
4532 case ISD::VSELECT:
4533 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
4534 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
4535 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
4536 case ISD::BUILD_VECTOR:
4537 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
4538 ISD::isBuildVectorAllOnes(Src.getNode());
4539 }
4540 return false;
4541 }
4542
4543 // Helper to push sign extension of vXi1 SETCC result through bitops.
signExtendBitcastSrcVector(SelectionDAG & DAG,EVT SExtVT,SDValue Src,const SDLoc & DL)4544 static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
4545 SDValue Src, const SDLoc &DL) {
4546 switch (Src.getOpcode()) {
4547 case ISD::SETCC:
4548 case ISD::FREEZE:
4549 case ISD::TRUNCATE:
4550 case ISD::BUILD_VECTOR:
4551 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4552 case ISD::AND:
4553 case ISD::XOR:
4554 case ISD::OR:
4555 return DAG.getNode(
4556 Src.getOpcode(), DL, SExtVT,
4557 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
4558 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
4559 case ISD::SELECT:
4560 case ISD::VSELECT:
4561 return DAG.getSelect(
4562 DL, SExtVT, Src.getOperand(0),
4563 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
4564 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
4565 }
4566 llvm_unreachable("Unexpected node type for vXi1 sign extension");
4567 }
4568
4569 static SDValue
performSETCC_BITCASTCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)4570 performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG,
4571 TargetLowering::DAGCombinerInfo &DCI,
4572 const LoongArchSubtarget &Subtarget) {
4573 SDLoc DL(N);
4574 EVT VT = N->getValueType(0);
4575 SDValue Src = N->getOperand(0);
4576 EVT SrcVT = Src.getValueType();
4577
4578 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
4579 return SDValue();
4580
4581 bool UseLASX;
4582 unsigned Opc = ISD::DELETED_NODE;
4583 EVT CmpVT = Src.getOperand(0).getValueType();
4584 EVT EltVT = CmpVT.getVectorElementType();
4585
4586 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
4587 UseLASX = false;
4588 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4589 CmpVT.getSizeInBits() == 256)
4590 UseLASX = true;
4591 else
4592 return SDValue();
4593
4594 SDValue SrcN1 = Src.getOperand(1);
4595 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
4596 default:
4597 break;
4598 case ISD::SETEQ:
4599 // x == 0 => not (vmsknez.b x)
4600 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4601 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
4602 break;
4603 case ISD::SETGT:
4604 // x > -1 => vmskgez.b x
4605 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
4606 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
4607 break;
4608 case ISD::SETGE:
4609 // x >= 0 => vmskgez.b x
4610 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4611 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
4612 break;
4613 case ISD::SETLT:
4614 // x < 0 => vmskltz.{b,h,w,d} x
4615 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
4616 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4617 EltVT == MVT::i64))
4618 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4619 break;
4620 case ISD::SETLE:
4621 // x <= -1 => vmskltz.{b,h,w,d} x
4622 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
4623 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4624 EltVT == MVT::i64))
4625 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4626 break;
4627 case ISD::SETNE:
4628 // x != 0 => vmsknez.b x
4629 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4630 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
4631 break;
4632 }
4633
4634 if (Opc == ISD::DELETED_NODE)
4635 return SDValue();
4636
4637 SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
4638 EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
4639 V = DAG.getZExtOrTrunc(V, DL, T);
4640 return DAG.getBitcast(VT, V);
4641 }
4642
performBITCASTCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)4643 static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
4644 TargetLowering::DAGCombinerInfo &DCI,
4645 const LoongArchSubtarget &Subtarget) {
4646 SDLoc DL(N);
4647 EVT VT = N->getValueType(0);
4648 SDValue Src = N->getOperand(0);
4649 EVT SrcVT = Src.getValueType();
4650
4651 if (!DCI.isBeforeLegalizeOps())
4652 return SDValue();
4653
4654 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
4655 return SDValue();
4656
4657 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
4658 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
4659 if (Res)
4660 return Res;
4661
4662 // Generate vXi1 using [X]VMSKLTZ
4663 MVT SExtVT;
4664 unsigned Opc;
4665 bool UseLASX = false;
4666 bool PropagateSExt = false;
4667
4668 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
4669 EVT CmpVT = Src.getOperand(0).getValueType();
4670 if (CmpVT.getSizeInBits() > 256)
4671 return SDValue();
4672 }
4673
4674 switch (SrcVT.getSimpleVT().SimpleTy) {
4675 default:
4676 return SDValue();
4677 case MVT::v2i1:
4678 SExtVT = MVT::v2i64;
4679 break;
4680 case MVT::v4i1:
4681 SExtVT = MVT::v4i32;
4682 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4683 SExtVT = MVT::v4i64;
4684 UseLASX = true;
4685 PropagateSExt = true;
4686 }
4687 break;
4688 case MVT::v8i1:
4689 SExtVT = MVT::v8i16;
4690 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4691 SExtVT = MVT::v8i32;
4692 UseLASX = true;
4693 PropagateSExt = true;
4694 }
4695 break;
4696 case MVT::v16i1:
4697 SExtVT = MVT::v16i8;
4698 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4699 SExtVT = MVT::v16i16;
4700 UseLASX = true;
4701 PropagateSExt = true;
4702 }
4703 break;
4704 case MVT::v32i1:
4705 SExtVT = MVT::v32i8;
4706 UseLASX = true;
4707 break;
4708 };
4709 if (UseLASX && !(Subtarget.has32S() && Subtarget.hasExtLASX()))
4710 return SDValue();
4711 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
4712 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4713 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4714
4715 SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src);
4716 EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
4717 V = DAG.getZExtOrTrunc(V, DL, T);
4718 return DAG.getBitcast(VT, V);
4719 }
4720
performORCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)4721 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
4722 TargetLowering::DAGCombinerInfo &DCI,
4723 const LoongArchSubtarget &Subtarget) {
4724 MVT GRLenVT = Subtarget.getGRLenVT();
4725 EVT ValTy = N->getValueType(0);
4726 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4727 ConstantSDNode *CN0, *CN1;
4728 SDLoc DL(N);
4729 unsigned ValBits = ValTy.getSizeInBits();
4730 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
4731 unsigned Shamt;
4732 bool SwapAndRetried = false;
4733
4734 // BSTRPICK requires the 32S feature.
4735 if (!Subtarget.has32S())
4736 return SDValue();
4737
4738 if (DCI.isBeforeLegalizeOps())
4739 return SDValue();
4740
4741 if (ValBits != 32 && ValBits != 64)
4742 return SDValue();
4743
4744 Retry:
4745 // 1st pattern to match BSTRINS:
4746 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
4747 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
4748 // =>
4749 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4750 if (N0.getOpcode() == ISD::AND &&
4751 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4752 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4753 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
4754 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4755 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
4756 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
4757 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4758 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4759 (MaskIdx0 + MaskLen0 <= ValBits)) {
4760 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
4761 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4762 N1.getOperand(0).getOperand(0),
4763 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
4764 DAG.getConstant(MaskIdx0, DL, GRLenVT));
4765 }
4766
4767 // 2nd pattern to match BSTRINS:
4768 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
4769 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
4770 // =>
4771 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4772 if (N0.getOpcode() == ISD::AND &&
4773 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4774 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4775 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
4776 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4777 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4778 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4779 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
4780 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
4781 (MaskIdx0 + MaskLen0 <= ValBits)) {
4782 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
4783 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4784 N1.getOperand(0).getOperand(0),
4785 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
4786 DAG.getConstant(MaskIdx0, DL, GRLenVT));
4787 }
4788
4789 // 3rd pattern to match BSTRINS:
4790 // R = or (and X, mask0), (and Y, mask1)
4791 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
4792 // =>
4793 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
4794 // where msb = lsb + size - 1
4795 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4796 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4797 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4798 (MaskIdx0 + MaskLen0 <= 64) &&
4799 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
4800 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
4801 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
4802 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4803 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
4804 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
4805 DAG.getConstant(ValBits == 32
4806 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
4807 : (MaskIdx0 + MaskLen0 - 1),
4808 DL, GRLenVT),
4809 DAG.getConstant(MaskIdx0, DL, GRLenVT));
4810 }
4811
4812 // 4th pattern to match BSTRINS:
4813 // R = or (and X, mask), (shl Y, shamt)
4814 // where mask = (2**shamt - 1)
4815 // =>
4816 // R = BSTRINS X, Y, ValBits - 1, shamt
4817 // where ValBits = 32 or 64
4818 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
4819 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4820 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
4821 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4822 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
4823 (MaskIdx0 + MaskLen0 <= ValBits)) {
4824 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
4825 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4826 N1.getOperand(0),
4827 DAG.getConstant((ValBits - 1), DL, GRLenVT),
4828 DAG.getConstant(Shamt, DL, GRLenVT));
4829 }
4830
4831 // 5th pattern to match BSTRINS:
4832 // R = or (and X, mask), const
4833 // where ~mask = (2**size - 1) << lsb, mask & const = 0
4834 // =>
4835 // R = BSTRINS X, (const >> lsb), msb, lsb
4836 // where msb = lsb + size - 1
4837 if (N0.getOpcode() == ISD::AND &&
4838 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4839 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4840 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
4841 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
4842 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
4843 return DAG.getNode(
4844 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4845 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
4846 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
4847 : (MaskIdx0 + MaskLen0 - 1),
4848 DL, GRLenVT),
4849 DAG.getConstant(MaskIdx0, DL, GRLenVT));
4850 }
4851
4852 // 6th pattern.
4853 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
4854 // by the incoming bits are known to be zero.
4855 // =>
4856 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
4857 //
4858 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
4859 // pattern is more common than the 1st. So we put the 1st before the 6th in
4860 // order to match as many nodes as possible.
4861 ConstantSDNode *CNMask, *CNShamt;
4862 unsigned MaskIdx, MaskLen;
4863 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
4864 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4865 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
4866 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4867 CNShamt->getZExtValue() + MaskLen <= ValBits) {
4868 Shamt = CNShamt->getZExtValue();
4869 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
4870 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
4871 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
4872 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
4873 N1.getOperand(0).getOperand(0),
4874 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
4875 DAG.getConstant(Shamt, DL, GRLenVT));
4876 }
4877 }
4878
4879 // 7th pattern.
4880 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
4881 // overwritten by the incoming bits are known to be zero.
4882 // =>
4883 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
4884 //
4885 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
4886 // before the 7th in order to match as many nodes as possible.
4887 if (N1.getOpcode() == ISD::AND &&
4888 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4889 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
4890 N1.getOperand(0).getOpcode() == ISD::SHL &&
4891 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4892 CNShamt->getZExtValue() == MaskIdx) {
4893 APInt ShMask(ValBits, CNMask->getZExtValue());
4894 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
4895 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
4896 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
4897 N1.getOperand(0).getOperand(0),
4898 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4899 DAG.getConstant(MaskIdx, DL, GRLenVT));
4900 }
4901 }
4902
4903 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
4904 if (!SwapAndRetried) {
4905 std::swap(N0, N1);
4906 SwapAndRetried = true;
4907 goto Retry;
4908 }
4909
4910 SwapAndRetried = false;
4911 Retry2:
4912 // 8th pattern.
4913 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
4914 // the incoming bits are known to be zero.
4915 // =>
4916 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
4917 //
4918 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
4919 // we put it here in order to match as many nodes as possible or generate less
4920 // instructions.
4921 if (N1.getOpcode() == ISD::AND &&
4922 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4923 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
4924 APInt ShMask(ValBits, CNMask->getZExtValue());
4925 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
4926 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
4927 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
4928 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
4929 N1->getOperand(0),
4930 DAG.getConstant(MaskIdx, DL, GRLenVT)),
4931 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4932 DAG.getConstant(MaskIdx, DL, GRLenVT));
4933 }
4934 }
4935 // Swap N0/N1 and retry.
4936 if (!SwapAndRetried) {
4937 std::swap(N0, N1);
4938 SwapAndRetried = true;
4939 goto Retry2;
4940 }
4941
4942 return SDValue();
4943 }
4944
checkValueWidth(SDValue V,ISD::LoadExtType & ExtType)4945 static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
4946 ExtType = ISD::NON_EXTLOAD;
4947
4948 switch (V.getNode()->getOpcode()) {
4949 case ISD::LOAD: {
4950 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
4951 if ((LoadNode->getMemoryVT() == MVT::i8) ||
4952 (LoadNode->getMemoryVT() == MVT::i16)) {
4953 ExtType = LoadNode->getExtensionType();
4954 return true;
4955 }
4956 return false;
4957 }
4958 case ISD::AssertSext: {
4959 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
4960 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
4961 ExtType = ISD::SEXTLOAD;
4962 return true;
4963 }
4964 return false;
4965 }
4966 case ISD::AssertZext: {
4967 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
4968 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
4969 ExtType = ISD::ZEXTLOAD;
4970 return true;
4971 }
4972 return false;
4973 }
4974 default:
4975 return false;
4976 }
4977
4978 return false;
4979 }
4980
4981 // Eliminate redundant truncation and zero-extension nodes.
4982 // * Case 1:
4983 // +------------+ +------------+ +------------+
4984 // | Input1 | | Input2 | | CC |
4985 // +------------+ +------------+ +------------+
4986 // | | |
4987 // V V +----+
4988 // +------------+ +------------+ |
4989 // | TRUNCATE | | TRUNCATE | |
4990 // +------------+ +------------+ |
4991 // | | |
4992 // V V |
4993 // +------------+ +------------+ |
4994 // | ZERO_EXT | | ZERO_EXT | |
4995 // +------------+ +------------+ |
4996 // | | |
4997 // | +-------------+ |
4998 // V V | |
4999 // +----------------+ | |
5000 // | AND | | |
5001 // +----------------+ | |
5002 // | | |
5003 // +---------------+ | |
5004 // | | |
5005 // V V V
5006 // +-------------+
5007 // | CMP |
5008 // +-------------+
5009 // * Case 2:
5010 // +------------+ +------------+ +-------------+ +------------+ +------------+
5011 // | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5012 // +------------+ +------------+ +-------------+ +------------+ +------------+
5013 // | | | | |
5014 // V | | | |
5015 // +------------+ | | | |
5016 // | XOR |<---------------------+ | |
5017 // +------------+ | | |
5018 // | | | |
5019 // V V +---------------+ |
5020 // +------------+ +------------+ | |
5021 // | TRUNCATE | | TRUNCATE | | +-------------------------+
5022 // +------------+ +------------+ | |
5023 // | | | |
5024 // V V | |
5025 // +------------+ +------------+ | |
5026 // | ZERO_EXT | | ZERO_EXT | | |
5027 // +------------+ +------------+ | |
5028 // | | | |
5029 // V V | |
5030 // +----------------+ | |
5031 // | AND | | |
5032 // +----------------+ | |
5033 // | | |
5034 // +---------------+ | |
5035 // | | |
5036 // V V V
5037 // +-------------+
5038 // | CMP |
5039 // +-------------+
performSETCCCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5040 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
5041 TargetLowering::DAGCombinerInfo &DCI,
5042 const LoongArchSubtarget &Subtarget) {
5043 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5044
5045 SDNode *AndNode = N->getOperand(0).getNode();
5046 if (AndNode->getOpcode() != ISD::AND)
5047 return SDValue();
5048
5049 SDValue AndInputValue2 = AndNode->getOperand(1);
5050 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5051 return SDValue();
5052
5053 SDValue CmpInputValue = N->getOperand(1);
5054 SDValue AndInputValue1 = AndNode->getOperand(0);
5055 if (AndInputValue1.getOpcode() == ISD::XOR) {
5056 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5057 return SDValue();
5058 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5059 if (!CN || CN->getSExtValue() != -1)
5060 return SDValue();
5061 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5062 if (!CN || CN->getSExtValue() != 0)
5063 return SDValue();
5064 AndInputValue1 = AndInputValue1.getOperand(0);
5065 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5066 return SDValue();
5067 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5068 if (AndInputValue2 != CmpInputValue)
5069 return SDValue();
5070 } else {
5071 return SDValue();
5072 }
5073
5074 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5075 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5076 return SDValue();
5077
5078 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5079 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5080 return SDValue();
5081
5082 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5083 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5084 ISD::LoadExtType ExtType1;
5085 ISD::LoadExtType ExtType2;
5086
5087 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5088 !checkValueWidth(TruncInputValue2, ExtType2))
5089 return SDValue();
5090
5091 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5092 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5093 return SDValue();
5094
5095 if ((ExtType2 != ISD::ZEXTLOAD) &&
5096 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5097 return SDValue();
5098
5099 // These truncation and zero-extension nodes are not necessary, remove them.
5100 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5101 TruncInputValue1, TruncInputValue2);
5102 SDValue NewSetCC =
5103 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5104 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5105 return SDValue(N, 0);
5106 }
5107
5108 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
performBITREV_WCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5109 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
5110 TargetLowering::DAGCombinerInfo &DCI,
5111 const LoongArchSubtarget &Subtarget) {
5112 if (DCI.isBeforeLegalizeOps())
5113 return SDValue();
5114
5115 SDValue Src = N->getOperand(0);
5116 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5117 return SDValue();
5118
5119 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5120 Src.getOperand(0));
5121 }
5122
5123 template <unsigned N>
legalizeIntrinsicImmArg(SDNode * Node,unsigned ImmOp,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,bool IsSigned=false)5124 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
5125 SelectionDAG &DAG,
5126 const LoongArchSubtarget &Subtarget,
5127 bool IsSigned = false) {
5128 SDLoc DL(Node);
5129 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5130 // Check the ImmArg.
5131 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5132 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5133 DAG.getContext()->emitError(Node->getOperationName(0) +
5134 ": argument out of range.");
5135 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5136 }
5137 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5138 }
5139
5140 template <unsigned N>
lowerVectorSplatImm(SDNode * Node,unsigned ImmOp,SelectionDAG & DAG,bool IsSigned=false)5141 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5142 SelectionDAG &DAG, bool IsSigned = false) {
5143 SDLoc DL(Node);
5144 EVT ResTy = Node->getValueType(0);
5145 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5146
5147 // Check the ImmArg.
5148 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5149 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5150 DAG.getContext()->emitError(Node->getOperationName(0) +
5151 ": argument out of range.");
5152 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5153 }
5154 return DAG.getConstant(
5155 APInt(ResTy.getScalarType().getSizeInBits(),
5156 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5157 DL, ResTy);
5158 }
5159
truncateVecElts(SDNode * Node,SelectionDAG & DAG)5160 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
5161 SDLoc DL(Node);
5162 EVT ResTy = Node->getValueType(0);
5163 SDValue Vec = Node->getOperand(2);
5164 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5165 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5166 }
5167
lowerVectorBitClear(SDNode * Node,SelectionDAG & DAG)5168 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
5169 SDLoc DL(Node);
5170 EVT ResTy = Node->getValueType(0);
5171 SDValue One = DAG.getConstant(1, DL, ResTy);
5172 SDValue Bit =
5173 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5174
5175 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5176 DAG.getNOT(DL, Bit, ResTy));
5177 }
5178
5179 template <unsigned N>
lowerVectorBitClearImm(SDNode * Node,SelectionDAG & DAG)5180 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
5181 SDLoc DL(Node);
5182 EVT ResTy = Node->getValueType(0);
5183 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5184 // Check the unsigned ImmArg.
5185 if (!isUInt<N>(CImm->getZExtValue())) {
5186 DAG.getContext()->emitError(Node->getOperationName(0) +
5187 ": argument out of range.");
5188 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5189 }
5190
5191 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5192 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5193
5194 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5195 }
5196
5197 template <unsigned N>
lowerVectorBitSetImm(SDNode * Node,SelectionDAG & DAG)5198 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
5199 SDLoc DL(Node);
5200 EVT ResTy = Node->getValueType(0);
5201 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5202 // Check the unsigned ImmArg.
5203 if (!isUInt<N>(CImm->getZExtValue())) {
5204 DAG.getContext()->emitError(Node->getOperationName(0) +
5205 ": argument out of range.");
5206 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5207 }
5208
5209 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5210 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5211 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5212 }
5213
5214 template <unsigned N>
lowerVectorBitRevImm(SDNode * Node,SelectionDAG & DAG)5215 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
5216 SDLoc DL(Node);
5217 EVT ResTy = Node->getValueType(0);
5218 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5219 // Check the unsigned ImmArg.
5220 if (!isUInt<N>(CImm->getZExtValue())) {
5221 DAG.getContext()->emitError(Node->getOperationName(0) +
5222 ": argument out of range.");
5223 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5224 }
5225
5226 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5227 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5228 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5229 }
5230
5231 static SDValue
performINTRINSIC_WO_CHAINCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5232 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
5233 TargetLowering::DAGCombinerInfo &DCI,
5234 const LoongArchSubtarget &Subtarget) {
5235 SDLoc DL(N);
5236 switch (N->getConstantOperandVal(0)) {
5237 default:
5238 break;
5239 case Intrinsic::loongarch_lsx_vadd_b:
5240 case Intrinsic::loongarch_lsx_vadd_h:
5241 case Intrinsic::loongarch_lsx_vadd_w:
5242 case Intrinsic::loongarch_lsx_vadd_d:
5243 case Intrinsic::loongarch_lasx_xvadd_b:
5244 case Intrinsic::loongarch_lasx_xvadd_h:
5245 case Intrinsic::loongarch_lasx_xvadd_w:
5246 case Intrinsic::loongarch_lasx_xvadd_d:
5247 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5248 N->getOperand(2));
5249 case Intrinsic::loongarch_lsx_vaddi_bu:
5250 case Intrinsic::loongarch_lsx_vaddi_hu:
5251 case Intrinsic::loongarch_lsx_vaddi_wu:
5252 case Intrinsic::loongarch_lsx_vaddi_du:
5253 case Intrinsic::loongarch_lasx_xvaddi_bu:
5254 case Intrinsic::loongarch_lasx_xvaddi_hu:
5255 case Intrinsic::loongarch_lasx_xvaddi_wu:
5256 case Intrinsic::loongarch_lasx_xvaddi_du:
5257 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5258 lowerVectorSplatImm<5>(N, 2, DAG));
5259 case Intrinsic::loongarch_lsx_vsub_b:
5260 case Intrinsic::loongarch_lsx_vsub_h:
5261 case Intrinsic::loongarch_lsx_vsub_w:
5262 case Intrinsic::loongarch_lsx_vsub_d:
5263 case Intrinsic::loongarch_lasx_xvsub_b:
5264 case Intrinsic::loongarch_lasx_xvsub_h:
5265 case Intrinsic::loongarch_lasx_xvsub_w:
5266 case Intrinsic::loongarch_lasx_xvsub_d:
5267 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5268 N->getOperand(2));
5269 case Intrinsic::loongarch_lsx_vsubi_bu:
5270 case Intrinsic::loongarch_lsx_vsubi_hu:
5271 case Intrinsic::loongarch_lsx_vsubi_wu:
5272 case Intrinsic::loongarch_lsx_vsubi_du:
5273 case Intrinsic::loongarch_lasx_xvsubi_bu:
5274 case Intrinsic::loongarch_lasx_xvsubi_hu:
5275 case Intrinsic::loongarch_lasx_xvsubi_wu:
5276 case Intrinsic::loongarch_lasx_xvsubi_du:
5277 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5278 lowerVectorSplatImm<5>(N, 2, DAG));
5279 case Intrinsic::loongarch_lsx_vneg_b:
5280 case Intrinsic::loongarch_lsx_vneg_h:
5281 case Intrinsic::loongarch_lsx_vneg_w:
5282 case Intrinsic::loongarch_lsx_vneg_d:
5283 case Intrinsic::loongarch_lasx_xvneg_b:
5284 case Intrinsic::loongarch_lasx_xvneg_h:
5285 case Intrinsic::loongarch_lasx_xvneg_w:
5286 case Intrinsic::loongarch_lasx_xvneg_d:
5287 return DAG.getNode(
5288 ISD::SUB, DL, N->getValueType(0),
5289 DAG.getConstant(
5290 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
5291 /*isSigned=*/true),
5292 SDLoc(N), N->getValueType(0)),
5293 N->getOperand(1));
5294 case Intrinsic::loongarch_lsx_vmax_b:
5295 case Intrinsic::loongarch_lsx_vmax_h:
5296 case Intrinsic::loongarch_lsx_vmax_w:
5297 case Intrinsic::loongarch_lsx_vmax_d:
5298 case Intrinsic::loongarch_lasx_xvmax_b:
5299 case Intrinsic::loongarch_lasx_xvmax_h:
5300 case Intrinsic::loongarch_lasx_xvmax_w:
5301 case Intrinsic::loongarch_lasx_xvmax_d:
5302 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5303 N->getOperand(2));
5304 case Intrinsic::loongarch_lsx_vmax_bu:
5305 case Intrinsic::loongarch_lsx_vmax_hu:
5306 case Intrinsic::loongarch_lsx_vmax_wu:
5307 case Intrinsic::loongarch_lsx_vmax_du:
5308 case Intrinsic::loongarch_lasx_xvmax_bu:
5309 case Intrinsic::loongarch_lasx_xvmax_hu:
5310 case Intrinsic::loongarch_lasx_xvmax_wu:
5311 case Intrinsic::loongarch_lasx_xvmax_du:
5312 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5313 N->getOperand(2));
5314 case Intrinsic::loongarch_lsx_vmaxi_b:
5315 case Intrinsic::loongarch_lsx_vmaxi_h:
5316 case Intrinsic::loongarch_lsx_vmaxi_w:
5317 case Intrinsic::loongarch_lsx_vmaxi_d:
5318 case Intrinsic::loongarch_lasx_xvmaxi_b:
5319 case Intrinsic::loongarch_lasx_xvmaxi_h:
5320 case Intrinsic::loongarch_lasx_xvmaxi_w:
5321 case Intrinsic::loongarch_lasx_xvmaxi_d:
5322 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5323 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5324 case Intrinsic::loongarch_lsx_vmaxi_bu:
5325 case Intrinsic::loongarch_lsx_vmaxi_hu:
5326 case Intrinsic::loongarch_lsx_vmaxi_wu:
5327 case Intrinsic::loongarch_lsx_vmaxi_du:
5328 case Intrinsic::loongarch_lasx_xvmaxi_bu:
5329 case Intrinsic::loongarch_lasx_xvmaxi_hu:
5330 case Intrinsic::loongarch_lasx_xvmaxi_wu:
5331 case Intrinsic::loongarch_lasx_xvmaxi_du:
5332 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5333 lowerVectorSplatImm<5>(N, 2, DAG));
5334 case Intrinsic::loongarch_lsx_vmin_b:
5335 case Intrinsic::loongarch_lsx_vmin_h:
5336 case Intrinsic::loongarch_lsx_vmin_w:
5337 case Intrinsic::loongarch_lsx_vmin_d:
5338 case Intrinsic::loongarch_lasx_xvmin_b:
5339 case Intrinsic::loongarch_lasx_xvmin_h:
5340 case Intrinsic::loongarch_lasx_xvmin_w:
5341 case Intrinsic::loongarch_lasx_xvmin_d:
5342 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5343 N->getOperand(2));
5344 case Intrinsic::loongarch_lsx_vmin_bu:
5345 case Intrinsic::loongarch_lsx_vmin_hu:
5346 case Intrinsic::loongarch_lsx_vmin_wu:
5347 case Intrinsic::loongarch_lsx_vmin_du:
5348 case Intrinsic::loongarch_lasx_xvmin_bu:
5349 case Intrinsic::loongarch_lasx_xvmin_hu:
5350 case Intrinsic::loongarch_lasx_xvmin_wu:
5351 case Intrinsic::loongarch_lasx_xvmin_du:
5352 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5353 N->getOperand(2));
5354 case Intrinsic::loongarch_lsx_vmini_b:
5355 case Intrinsic::loongarch_lsx_vmini_h:
5356 case Intrinsic::loongarch_lsx_vmini_w:
5357 case Intrinsic::loongarch_lsx_vmini_d:
5358 case Intrinsic::loongarch_lasx_xvmini_b:
5359 case Intrinsic::loongarch_lasx_xvmini_h:
5360 case Intrinsic::loongarch_lasx_xvmini_w:
5361 case Intrinsic::loongarch_lasx_xvmini_d:
5362 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5363 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5364 case Intrinsic::loongarch_lsx_vmini_bu:
5365 case Intrinsic::loongarch_lsx_vmini_hu:
5366 case Intrinsic::loongarch_lsx_vmini_wu:
5367 case Intrinsic::loongarch_lsx_vmini_du:
5368 case Intrinsic::loongarch_lasx_xvmini_bu:
5369 case Intrinsic::loongarch_lasx_xvmini_hu:
5370 case Intrinsic::loongarch_lasx_xvmini_wu:
5371 case Intrinsic::loongarch_lasx_xvmini_du:
5372 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5373 lowerVectorSplatImm<5>(N, 2, DAG));
5374 case Intrinsic::loongarch_lsx_vmul_b:
5375 case Intrinsic::loongarch_lsx_vmul_h:
5376 case Intrinsic::loongarch_lsx_vmul_w:
5377 case Intrinsic::loongarch_lsx_vmul_d:
5378 case Intrinsic::loongarch_lasx_xvmul_b:
5379 case Intrinsic::loongarch_lasx_xvmul_h:
5380 case Intrinsic::loongarch_lasx_xvmul_w:
5381 case Intrinsic::loongarch_lasx_xvmul_d:
5382 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
5383 N->getOperand(2));
5384 case Intrinsic::loongarch_lsx_vmadd_b:
5385 case Intrinsic::loongarch_lsx_vmadd_h:
5386 case Intrinsic::loongarch_lsx_vmadd_w:
5387 case Intrinsic::loongarch_lsx_vmadd_d:
5388 case Intrinsic::loongarch_lasx_xvmadd_b:
5389 case Intrinsic::loongarch_lasx_xvmadd_h:
5390 case Intrinsic::loongarch_lasx_xvmadd_w:
5391 case Intrinsic::loongarch_lasx_xvmadd_d: {
5392 EVT ResTy = N->getValueType(0);
5393 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
5394 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5395 N->getOperand(3)));
5396 }
5397 case Intrinsic::loongarch_lsx_vmsub_b:
5398 case Intrinsic::loongarch_lsx_vmsub_h:
5399 case Intrinsic::loongarch_lsx_vmsub_w:
5400 case Intrinsic::loongarch_lsx_vmsub_d:
5401 case Intrinsic::loongarch_lasx_xvmsub_b:
5402 case Intrinsic::loongarch_lasx_xvmsub_h:
5403 case Intrinsic::loongarch_lasx_xvmsub_w:
5404 case Intrinsic::loongarch_lasx_xvmsub_d: {
5405 EVT ResTy = N->getValueType(0);
5406 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
5407 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5408 N->getOperand(3)));
5409 }
5410 case Intrinsic::loongarch_lsx_vdiv_b:
5411 case Intrinsic::loongarch_lsx_vdiv_h:
5412 case Intrinsic::loongarch_lsx_vdiv_w:
5413 case Intrinsic::loongarch_lsx_vdiv_d:
5414 case Intrinsic::loongarch_lasx_xvdiv_b:
5415 case Intrinsic::loongarch_lasx_xvdiv_h:
5416 case Intrinsic::loongarch_lasx_xvdiv_w:
5417 case Intrinsic::loongarch_lasx_xvdiv_d:
5418 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
5419 N->getOperand(2));
5420 case Intrinsic::loongarch_lsx_vdiv_bu:
5421 case Intrinsic::loongarch_lsx_vdiv_hu:
5422 case Intrinsic::loongarch_lsx_vdiv_wu:
5423 case Intrinsic::loongarch_lsx_vdiv_du:
5424 case Intrinsic::loongarch_lasx_xvdiv_bu:
5425 case Intrinsic::loongarch_lasx_xvdiv_hu:
5426 case Intrinsic::loongarch_lasx_xvdiv_wu:
5427 case Intrinsic::loongarch_lasx_xvdiv_du:
5428 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
5429 N->getOperand(2));
5430 case Intrinsic::loongarch_lsx_vmod_b:
5431 case Intrinsic::loongarch_lsx_vmod_h:
5432 case Intrinsic::loongarch_lsx_vmod_w:
5433 case Intrinsic::loongarch_lsx_vmod_d:
5434 case Intrinsic::loongarch_lasx_xvmod_b:
5435 case Intrinsic::loongarch_lasx_xvmod_h:
5436 case Intrinsic::loongarch_lasx_xvmod_w:
5437 case Intrinsic::loongarch_lasx_xvmod_d:
5438 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
5439 N->getOperand(2));
5440 case Intrinsic::loongarch_lsx_vmod_bu:
5441 case Intrinsic::loongarch_lsx_vmod_hu:
5442 case Intrinsic::loongarch_lsx_vmod_wu:
5443 case Intrinsic::loongarch_lsx_vmod_du:
5444 case Intrinsic::loongarch_lasx_xvmod_bu:
5445 case Intrinsic::loongarch_lasx_xvmod_hu:
5446 case Intrinsic::loongarch_lasx_xvmod_wu:
5447 case Intrinsic::loongarch_lasx_xvmod_du:
5448 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
5449 N->getOperand(2));
5450 case Intrinsic::loongarch_lsx_vand_v:
5451 case Intrinsic::loongarch_lasx_xvand_v:
5452 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5453 N->getOperand(2));
5454 case Intrinsic::loongarch_lsx_vor_v:
5455 case Intrinsic::loongarch_lasx_xvor_v:
5456 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5457 N->getOperand(2));
5458 case Intrinsic::loongarch_lsx_vxor_v:
5459 case Intrinsic::loongarch_lasx_xvxor_v:
5460 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5461 N->getOperand(2));
5462 case Intrinsic::loongarch_lsx_vnor_v:
5463 case Intrinsic::loongarch_lasx_xvnor_v: {
5464 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5465 N->getOperand(2));
5466 return DAG.getNOT(DL, Res, Res->getValueType(0));
5467 }
5468 case Intrinsic::loongarch_lsx_vandi_b:
5469 case Intrinsic::loongarch_lasx_xvandi_b:
5470 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5471 lowerVectorSplatImm<8>(N, 2, DAG));
5472 case Intrinsic::loongarch_lsx_vori_b:
5473 case Intrinsic::loongarch_lasx_xvori_b:
5474 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5475 lowerVectorSplatImm<8>(N, 2, DAG));
5476 case Intrinsic::loongarch_lsx_vxori_b:
5477 case Intrinsic::loongarch_lasx_xvxori_b:
5478 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5479 lowerVectorSplatImm<8>(N, 2, DAG));
5480 case Intrinsic::loongarch_lsx_vsll_b:
5481 case Intrinsic::loongarch_lsx_vsll_h:
5482 case Intrinsic::loongarch_lsx_vsll_w:
5483 case Intrinsic::loongarch_lsx_vsll_d:
5484 case Intrinsic::loongarch_lasx_xvsll_b:
5485 case Intrinsic::loongarch_lasx_xvsll_h:
5486 case Intrinsic::loongarch_lasx_xvsll_w:
5487 case Intrinsic::loongarch_lasx_xvsll_d:
5488 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5489 truncateVecElts(N, DAG));
5490 case Intrinsic::loongarch_lsx_vslli_b:
5491 case Intrinsic::loongarch_lasx_xvslli_b:
5492 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5493 lowerVectorSplatImm<3>(N, 2, DAG));
5494 case Intrinsic::loongarch_lsx_vslli_h:
5495 case Intrinsic::loongarch_lasx_xvslli_h:
5496 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5497 lowerVectorSplatImm<4>(N, 2, DAG));
5498 case Intrinsic::loongarch_lsx_vslli_w:
5499 case Intrinsic::loongarch_lasx_xvslli_w:
5500 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5501 lowerVectorSplatImm<5>(N, 2, DAG));
5502 case Intrinsic::loongarch_lsx_vslli_d:
5503 case Intrinsic::loongarch_lasx_xvslli_d:
5504 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5505 lowerVectorSplatImm<6>(N, 2, DAG));
5506 case Intrinsic::loongarch_lsx_vsrl_b:
5507 case Intrinsic::loongarch_lsx_vsrl_h:
5508 case Intrinsic::loongarch_lsx_vsrl_w:
5509 case Intrinsic::loongarch_lsx_vsrl_d:
5510 case Intrinsic::loongarch_lasx_xvsrl_b:
5511 case Intrinsic::loongarch_lasx_xvsrl_h:
5512 case Intrinsic::loongarch_lasx_xvsrl_w:
5513 case Intrinsic::loongarch_lasx_xvsrl_d:
5514 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5515 truncateVecElts(N, DAG));
5516 case Intrinsic::loongarch_lsx_vsrli_b:
5517 case Intrinsic::loongarch_lasx_xvsrli_b:
5518 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5519 lowerVectorSplatImm<3>(N, 2, DAG));
5520 case Intrinsic::loongarch_lsx_vsrli_h:
5521 case Intrinsic::loongarch_lasx_xvsrli_h:
5522 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5523 lowerVectorSplatImm<4>(N, 2, DAG));
5524 case Intrinsic::loongarch_lsx_vsrli_w:
5525 case Intrinsic::loongarch_lasx_xvsrli_w:
5526 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5527 lowerVectorSplatImm<5>(N, 2, DAG));
5528 case Intrinsic::loongarch_lsx_vsrli_d:
5529 case Intrinsic::loongarch_lasx_xvsrli_d:
5530 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5531 lowerVectorSplatImm<6>(N, 2, DAG));
5532 case Intrinsic::loongarch_lsx_vsra_b:
5533 case Intrinsic::loongarch_lsx_vsra_h:
5534 case Intrinsic::loongarch_lsx_vsra_w:
5535 case Intrinsic::loongarch_lsx_vsra_d:
5536 case Intrinsic::loongarch_lasx_xvsra_b:
5537 case Intrinsic::loongarch_lasx_xvsra_h:
5538 case Intrinsic::loongarch_lasx_xvsra_w:
5539 case Intrinsic::loongarch_lasx_xvsra_d:
5540 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5541 truncateVecElts(N, DAG));
5542 case Intrinsic::loongarch_lsx_vsrai_b:
5543 case Intrinsic::loongarch_lasx_xvsrai_b:
5544 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5545 lowerVectorSplatImm<3>(N, 2, DAG));
5546 case Intrinsic::loongarch_lsx_vsrai_h:
5547 case Intrinsic::loongarch_lasx_xvsrai_h:
5548 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5549 lowerVectorSplatImm<4>(N, 2, DAG));
5550 case Intrinsic::loongarch_lsx_vsrai_w:
5551 case Intrinsic::loongarch_lasx_xvsrai_w:
5552 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5553 lowerVectorSplatImm<5>(N, 2, DAG));
5554 case Intrinsic::loongarch_lsx_vsrai_d:
5555 case Intrinsic::loongarch_lasx_xvsrai_d:
5556 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5557 lowerVectorSplatImm<6>(N, 2, DAG));
5558 case Intrinsic::loongarch_lsx_vclz_b:
5559 case Intrinsic::loongarch_lsx_vclz_h:
5560 case Intrinsic::loongarch_lsx_vclz_w:
5561 case Intrinsic::loongarch_lsx_vclz_d:
5562 case Intrinsic::loongarch_lasx_xvclz_b:
5563 case Intrinsic::loongarch_lasx_xvclz_h:
5564 case Intrinsic::loongarch_lasx_xvclz_w:
5565 case Intrinsic::loongarch_lasx_xvclz_d:
5566 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
5567 case Intrinsic::loongarch_lsx_vpcnt_b:
5568 case Intrinsic::loongarch_lsx_vpcnt_h:
5569 case Intrinsic::loongarch_lsx_vpcnt_w:
5570 case Intrinsic::loongarch_lsx_vpcnt_d:
5571 case Intrinsic::loongarch_lasx_xvpcnt_b:
5572 case Intrinsic::loongarch_lasx_xvpcnt_h:
5573 case Intrinsic::loongarch_lasx_xvpcnt_w:
5574 case Intrinsic::loongarch_lasx_xvpcnt_d:
5575 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
5576 case Intrinsic::loongarch_lsx_vbitclr_b:
5577 case Intrinsic::loongarch_lsx_vbitclr_h:
5578 case Intrinsic::loongarch_lsx_vbitclr_w:
5579 case Intrinsic::loongarch_lsx_vbitclr_d:
5580 case Intrinsic::loongarch_lasx_xvbitclr_b:
5581 case Intrinsic::loongarch_lasx_xvbitclr_h:
5582 case Intrinsic::loongarch_lasx_xvbitclr_w:
5583 case Intrinsic::loongarch_lasx_xvbitclr_d:
5584 return lowerVectorBitClear(N, DAG);
5585 case Intrinsic::loongarch_lsx_vbitclri_b:
5586 case Intrinsic::loongarch_lasx_xvbitclri_b:
5587 return lowerVectorBitClearImm<3>(N, DAG);
5588 case Intrinsic::loongarch_lsx_vbitclri_h:
5589 case Intrinsic::loongarch_lasx_xvbitclri_h:
5590 return lowerVectorBitClearImm<4>(N, DAG);
5591 case Intrinsic::loongarch_lsx_vbitclri_w:
5592 case Intrinsic::loongarch_lasx_xvbitclri_w:
5593 return lowerVectorBitClearImm<5>(N, DAG);
5594 case Intrinsic::loongarch_lsx_vbitclri_d:
5595 case Intrinsic::loongarch_lasx_xvbitclri_d:
5596 return lowerVectorBitClearImm<6>(N, DAG);
5597 case Intrinsic::loongarch_lsx_vbitset_b:
5598 case Intrinsic::loongarch_lsx_vbitset_h:
5599 case Intrinsic::loongarch_lsx_vbitset_w:
5600 case Intrinsic::loongarch_lsx_vbitset_d:
5601 case Intrinsic::loongarch_lasx_xvbitset_b:
5602 case Intrinsic::loongarch_lasx_xvbitset_h:
5603 case Intrinsic::loongarch_lasx_xvbitset_w:
5604 case Intrinsic::loongarch_lasx_xvbitset_d: {
5605 EVT VecTy = N->getValueType(0);
5606 SDValue One = DAG.getConstant(1, DL, VecTy);
5607 return DAG.getNode(
5608 ISD::OR, DL, VecTy, N->getOperand(1),
5609 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
5610 }
5611 case Intrinsic::loongarch_lsx_vbitseti_b:
5612 case Intrinsic::loongarch_lasx_xvbitseti_b:
5613 return lowerVectorBitSetImm<3>(N, DAG);
5614 case Intrinsic::loongarch_lsx_vbitseti_h:
5615 case Intrinsic::loongarch_lasx_xvbitseti_h:
5616 return lowerVectorBitSetImm<4>(N, DAG);
5617 case Intrinsic::loongarch_lsx_vbitseti_w:
5618 case Intrinsic::loongarch_lasx_xvbitseti_w:
5619 return lowerVectorBitSetImm<5>(N, DAG);
5620 case Intrinsic::loongarch_lsx_vbitseti_d:
5621 case Intrinsic::loongarch_lasx_xvbitseti_d:
5622 return lowerVectorBitSetImm<6>(N, DAG);
5623 case Intrinsic::loongarch_lsx_vbitrev_b:
5624 case Intrinsic::loongarch_lsx_vbitrev_h:
5625 case Intrinsic::loongarch_lsx_vbitrev_w:
5626 case Intrinsic::loongarch_lsx_vbitrev_d:
5627 case Intrinsic::loongarch_lasx_xvbitrev_b:
5628 case Intrinsic::loongarch_lasx_xvbitrev_h:
5629 case Intrinsic::loongarch_lasx_xvbitrev_w:
5630 case Intrinsic::loongarch_lasx_xvbitrev_d: {
5631 EVT VecTy = N->getValueType(0);
5632 SDValue One = DAG.getConstant(1, DL, VecTy);
5633 return DAG.getNode(
5634 ISD::XOR, DL, VecTy, N->getOperand(1),
5635 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
5636 }
5637 case Intrinsic::loongarch_lsx_vbitrevi_b:
5638 case Intrinsic::loongarch_lasx_xvbitrevi_b:
5639 return lowerVectorBitRevImm<3>(N, DAG);
5640 case Intrinsic::loongarch_lsx_vbitrevi_h:
5641 case Intrinsic::loongarch_lasx_xvbitrevi_h:
5642 return lowerVectorBitRevImm<4>(N, DAG);
5643 case Intrinsic::loongarch_lsx_vbitrevi_w:
5644 case Intrinsic::loongarch_lasx_xvbitrevi_w:
5645 return lowerVectorBitRevImm<5>(N, DAG);
5646 case Intrinsic::loongarch_lsx_vbitrevi_d:
5647 case Intrinsic::loongarch_lasx_xvbitrevi_d:
5648 return lowerVectorBitRevImm<6>(N, DAG);
5649 case Intrinsic::loongarch_lsx_vfadd_s:
5650 case Intrinsic::loongarch_lsx_vfadd_d:
5651 case Intrinsic::loongarch_lasx_xvfadd_s:
5652 case Intrinsic::loongarch_lasx_xvfadd_d:
5653 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
5654 N->getOperand(2));
5655 case Intrinsic::loongarch_lsx_vfsub_s:
5656 case Intrinsic::loongarch_lsx_vfsub_d:
5657 case Intrinsic::loongarch_lasx_xvfsub_s:
5658 case Intrinsic::loongarch_lasx_xvfsub_d:
5659 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
5660 N->getOperand(2));
5661 case Intrinsic::loongarch_lsx_vfmul_s:
5662 case Intrinsic::loongarch_lsx_vfmul_d:
5663 case Intrinsic::loongarch_lasx_xvfmul_s:
5664 case Intrinsic::loongarch_lasx_xvfmul_d:
5665 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
5666 N->getOperand(2));
5667 case Intrinsic::loongarch_lsx_vfdiv_s:
5668 case Intrinsic::loongarch_lsx_vfdiv_d:
5669 case Intrinsic::loongarch_lasx_xvfdiv_s:
5670 case Intrinsic::loongarch_lasx_xvfdiv_d:
5671 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
5672 N->getOperand(2));
5673 case Intrinsic::loongarch_lsx_vfmadd_s:
5674 case Intrinsic::loongarch_lsx_vfmadd_d:
5675 case Intrinsic::loongarch_lasx_xvfmadd_s:
5676 case Intrinsic::loongarch_lasx_xvfmadd_d:
5677 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
5678 N->getOperand(2), N->getOperand(3));
5679 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
5680 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5681 N->getOperand(1), N->getOperand(2),
5682 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
5683 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
5684 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
5685 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5686 N->getOperand(1), N->getOperand(2),
5687 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
5688 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
5689 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
5690 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5691 N->getOperand(1), N->getOperand(2),
5692 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
5693 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
5694 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5695 N->getOperand(1), N->getOperand(2),
5696 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
5697 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
5698 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
5699 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
5700 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
5701 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
5702 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
5703 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
5704 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
5705 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
5706 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
5707 N->getOperand(1)));
5708 case Intrinsic::loongarch_lsx_vreplve_b:
5709 case Intrinsic::loongarch_lsx_vreplve_h:
5710 case Intrinsic::loongarch_lsx_vreplve_w:
5711 case Intrinsic::loongarch_lsx_vreplve_d:
5712 case Intrinsic::loongarch_lasx_xvreplve_b:
5713 case Intrinsic::loongarch_lasx_xvreplve_h:
5714 case Intrinsic::loongarch_lasx_xvreplve_w:
5715 case Intrinsic::loongarch_lasx_xvreplve_d:
5716 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
5717 N->getOperand(1),
5718 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
5719 N->getOperand(2)));
5720 }
5721 return SDValue();
5722 }
5723
performMOVGR2FR_WCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5724 static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG,
5725 TargetLowering::DAGCombinerInfo &DCI,
5726 const LoongArchSubtarget &Subtarget) {
5727 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
5728 // conversion is unnecessary and can be replaced with the
5729 // MOVFR2GR_S_LA64 operand.
5730 SDValue Op0 = N->getOperand(0);
5731 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
5732 return Op0.getOperand(0);
5733 return SDValue();
5734 }
5735
performMOVFR2GR_SCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5736 static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG,
5737 TargetLowering::DAGCombinerInfo &DCI,
5738 const LoongArchSubtarget &Subtarget) {
5739 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
5740 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
5741 // operand.
5742 SDValue Op0 = N->getOperand(0);
5743 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
5744 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
5745 "Unexpected value type!");
5746 return Op0.getOperand(0);
5747 }
5748 return SDValue();
5749 }
5750
performVMSKLTZCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5751 static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG,
5752 TargetLowering::DAGCombinerInfo &DCI,
5753 const LoongArchSubtarget &Subtarget) {
5754 MVT VT = N->getSimpleValueType(0);
5755 unsigned NumBits = VT.getScalarSizeInBits();
5756
5757 // Simplify the inputs.
5758 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5759 APInt DemandedMask(APInt::getAllOnes(NumBits));
5760 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
5761 return SDValue(N, 0);
5762
5763 return SDValue();
5764 }
5765
5766 static SDValue
performSPLIT_PAIR_F64Combine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)5767 performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG,
5768 TargetLowering::DAGCombinerInfo &DCI,
5769 const LoongArchSubtarget &Subtarget) {
5770 SDValue Op0 = N->getOperand(0);
5771 SDLoc DL(N);
5772
5773 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
5774 // redundant. Instead, use BuildPairF64's operands directly.
5775 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
5776 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
5777
5778 if (Op0->isUndef()) {
5779 SDValue Lo = DAG.getUNDEF(MVT::i32);
5780 SDValue Hi = DAG.getUNDEF(MVT::i32);
5781 return DCI.CombineTo(N, Lo, Hi);
5782 }
5783
5784 // It's cheaper to materialise two 32-bit integers than to load a double
5785 // from the constant pool and transfer it to integer registers through the
5786 // stack.
5787 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
5788 APInt V = C->getValueAPF().bitcastToAPInt();
5789 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
5790 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
5791 return DCI.CombineTo(N, Lo, Hi);
5792 }
5793
5794 return SDValue();
5795 }
5796
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const5797 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
5798 DAGCombinerInfo &DCI) const {
5799 SelectionDAG &DAG = DCI.DAG;
5800 switch (N->getOpcode()) {
5801 default:
5802 break;
5803 case ISD::AND:
5804 return performANDCombine(N, DAG, DCI, Subtarget);
5805 case ISD::OR:
5806 return performORCombine(N, DAG, DCI, Subtarget);
5807 case ISD::SETCC:
5808 return performSETCCCombine(N, DAG, DCI, Subtarget);
5809 case ISD::SRL:
5810 return performSRLCombine(N, DAG, DCI, Subtarget);
5811 case ISD::BITCAST:
5812 return performBITCASTCombine(N, DAG, DCI, Subtarget);
5813 case LoongArchISD::BITREV_W:
5814 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
5815 case ISD::INTRINSIC_WO_CHAIN:
5816 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
5817 case LoongArchISD::MOVGR2FR_W_LA64:
5818 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
5819 case LoongArchISD::MOVFR2GR_S_LA64:
5820 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
5821 case LoongArchISD::VMSKLTZ:
5822 case LoongArchISD::XVMSKLTZ:
5823 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
5824 case LoongArchISD::SPLIT_PAIR_F64:
5825 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
5826 }
5827 return SDValue();
5828 }
5829
insertDivByZeroTrap(MachineInstr & MI,MachineBasicBlock * MBB)5830 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
5831 MachineBasicBlock *MBB) {
5832 if (!ZeroDivCheck)
5833 return MBB;
5834
5835 // Build instructions:
5836 // MBB:
5837 // div(or mod) $dst, $dividend, $divisor
5838 // bne $divisor, $zero, SinkMBB
5839 // BreakMBB:
5840 // break 7 // BRK_DIVZERO
5841 // SinkMBB:
5842 // fallthrough
5843 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
5844 MachineFunction::iterator It = ++MBB->getIterator();
5845 MachineFunction *MF = MBB->getParent();
5846 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5847 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5848 MF->insert(It, BreakMBB);
5849 MF->insert(It, SinkMBB);
5850
5851 // Transfer the remainder of MBB and its successor edges to SinkMBB.
5852 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
5853 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
5854
5855 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
5856 DebugLoc DL = MI.getDebugLoc();
5857 MachineOperand &Divisor = MI.getOperand(2);
5858 Register DivisorReg = Divisor.getReg();
5859
5860 // MBB:
5861 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
5862 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
5863 .addReg(LoongArch::R0)
5864 .addMBB(SinkMBB);
5865 MBB->addSuccessor(BreakMBB);
5866 MBB->addSuccessor(SinkMBB);
5867
5868 // BreakMBB:
5869 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
5870 // definition of BRK_DIVZERO.
5871 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
5872 BreakMBB->addSuccessor(SinkMBB);
5873
5874 // Clear Divisor's kill flag.
5875 Divisor.setIsKill(false);
5876
5877 return SinkMBB;
5878 }
5879
5880 static MachineBasicBlock *
emitVecCondBranchPseudo(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)5881 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
5882 const LoongArchSubtarget &Subtarget) {
5883 unsigned CondOpc;
5884 switch (MI.getOpcode()) {
5885 default:
5886 llvm_unreachable("Unexpected opcode");
5887 case LoongArch::PseudoVBZ:
5888 CondOpc = LoongArch::VSETEQZ_V;
5889 break;
5890 case LoongArch::PseudoVBZ_B:
5891 CondOpc = LoongArch::VSETANYEQZ_B;
5892 break;
5893 case LoongArch::PseudoVBZ_H:
5894 CondOpc = LoongArch::VSETANYEQZ_H;
5895 break;
5896 case LoongArch::PseudoVBZ_W:
5897 CondOpc = LoongArch::VSETANYEQZ_W;
5898 break;
5899 case LoongArch::PseudoVBZ_D:
5900 CondOpc = LoongArch::VSETANYEQZ_D;
5901 break;
5902 case LoongArch::PseudoVBNZ:
5903 CondOpc = LoongArch::VSETNEZ_V;
5904 break;
5905 case LoongArch::PseudoVBNZ_B:
5906 CondOpc = LoongArch::VSETALLNEZ_B;
5907 break;
5908 case LoongArch::PseudoVBNZ_H:
5909 CondOpc = LoongArch::VSETALLNEZ_H;
5910 break;
5911 case LoongArch::PseudoVBNZ_W:
5912 CondOpc = LoongArch::VSETALLNEZ_W;
5913 break;
5914 case LoongArch::PseudoVBNZ_D:
5915 CondOpc = LoongArch::VSETALLNEZ_D;
5916 break;
5917 case LoongArch::PseudoXVBZ:
5918 CondOpc = LoongArch::XVSETEQZ_V;
5919 break;
5920 case LoongArch::PseudoXVBZ_B:
5921 CondOpc = LoongArch::XVSETANYEQZ_B;
5922 break;
5923 case LoongArch::PseudoXVBZ_H:
5924 CondOpc = LoongArch::XVSETANYEQZ_H;
5925 break;
5926 case LoongArch::PseudoXVBZ_W:
5927 CondOpc = LoongArch::XVSETANYEQZ_W;
5928 break;
5929 case LoongArch::PseudoXVBZ_D:
5930 CondOpc = LoongArch::XVSETANYEQZ_D;
5931 break;
5932 case LoongArch::PseudoXVBNZ:
5933 CondOpc = LoongArch::XVSETNEZ_V;
5934 break;
5935 case LoongArch::PseudoXVBNZ_B:
5936 CondOpc = LoongArch::XVSETALLNEZ_B;
5937 break;
5938 case LoongArch::PseudoXVBNZ_H:
5939 CondOpc = LoongArch::XVSETALLNEZ_H;
5940 break;
5941 case LoongArch::PseudoXVBNZ_W:
5942 CondOpc = LoongArch::XVSETALLNEZ_W;
5943 break;
5944 case LoongArch::PseudoXVBNZ_D:
5945 CondOpc = LoongArch::XVSETALLNEZ_D;
5946 break;
5947 }
5948
5949 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
5950 const BasicBlock *LLVM_BB = BB->getBasicBlock();
5951 DebugLoc DL = MI.getDebugLoc();
5952 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5953 MachineFunction::iterator It = ++BB->getIterator();
5954
5955 MachineFunction *F = BB->getParent();
5956 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
5957 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
5958 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
5959
5960 F->insert(It, FalseBB);
5961 F->insert(It, TrueBB);
5962 F->insert(It, SinkBB);
5963
5964 // Transfer the remainder of MBB and its successor edges to Sink.
5965 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
5966 SinkBB->transferSuccessorsAndUpdatePHIs(BB);
5967
5968 // Insert the real instruction to BB.
5969 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
5970 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
5971
5972 // Insert branch.
5973 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
5974 BB->addSuccessor(FalseBB);
5975 BB->addSuccessor(TrueBB);
5976
5977 // FalseBB.
5978 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
5979 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
5980 .addReg(LoongArch::R0)
5981 .addImm(0);
5982 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
5983 FalseBB->addSuccessor(SinkBB);
5984
5985 // TrueBB.
5986 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
5987 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
5988 .addReg(LoongArch::R0)
5989 .addImm(1);
5990 TrueBB->addSuccessor(SinkBB);
5991
5992 // SinkBB: merge the results.
5993 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
5994 MI.getOperand(0).getReg())
5995 .addReg(RD1)
5996 .addMBB(FalseBB)
5997 .addReg(RD2)
5998 .addMBB(TrueBB);
5999
6000 // The pseudo instruction is gone now.
6001 MI.eraseFromParent();
6002 return SinkBB;
6003 }
6004
6005 static MachineBasicBlock *
emitPseudoXVINSGR2VR(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6006 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
6007 const LoongArchSubtarget &Subtarget) {
6008 unsigned InsOp;
6009 unsigned HalfSize;
6010 switch (MI.getOpcode()) {
6011 default:
6012 llvm_unreachable("Unexpected opcode");
6013 case LoongArch::PseudoXVINSGR2VR_B:
6014 HalfSize = 16;
6015 InsOp = LoongArch::VINSGR2VR_B;
6016 break;
6017 case LoongArch::PseudoXVINSGR2VR_H:
6018 HalfSize = 8;
6019 InsOp = LoongArch::VINSGR2VR_H;
6020 break;
6021 }
6022 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6023 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6024 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6025 DebugLoc DL = MI.getDebugLoc();
6026 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6027 // XDst = vector_insert XSrc, Elt, Idx
6028 Register XDst = MI.getOperand(0).getReg();
6029 Register XSrc = MI.getOperand(1).getReg();
6030 Register Elt = MI.getOperand(2).getReg();
6031 unsigned Idx = MI.getOperand(3).getImm();
6032
6033 Register ScratchReg1 = XSrc;
6034 if (Idx >= HalfSize) {
6035 ScratchReg1 = MRI.createVirtualRegister(RC);
6036 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
6037 .addReg(XSrc)
6038 .addReg(XSrc)
6039 .addImm(1);
6040 }
6041
6042 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6043 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6044 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6045 .addReg(ScratchReg1, 0, LoongArch::sub_128);
6046 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
6047 .addReg(ScratchSubReg1)
6048 .addReg(Elt)
6049 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
6050
6051 Register ScratchReg2 = XDst;
6052 if (Idx >= HalfSize)
6053 ScratchReg2 = MRI.createVirtualRegister(RC);
6054
6055 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
6056 .addImm(0)
6057 .addReg(ScratchSubReg2)
6058 .addImm(LoongArch::sub_128);
6059
6060 if (Idx >= HalfSize)
6061 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
6062 .addReg(XSrc)
6063 .addReg(ScratchReg2)
6064 .addImm(2);
6065
6066 MI.eraseFromParent();
6067 return BB;
6068 }
6069
emitPseudoCTPOP(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6070 static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
6071 MachineBasicBlock *BB,
6072 const LoongArchSubtarget &Subtarget) {
6073 assert(Subtarget.hasExtLSX());
6074 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6075 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6076 DebugLoc DL = MI.getDebugLoc();
6077 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6078 Register Dst = MI.getOperand(0).getReg();
6079 Register Src = MI.getOperand(1).getReg();
6080 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6081 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6082 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6083
6084 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6085 BuildMI(*BB, MI, DL,
6086 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6087 : LoongArch::VINSGR2VR_W),
6088 ScratchReg2)
6089 .addReg(ScratchReg1)
6090 .addReg(Src)
6091 .addImm(0);
6092 BuildMI(
6093 *BB, MI, DL,
6094 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6095 ScratchReg3)
6096 .addReg(ScratchReg2);
6097 BuildMI(*BB, MI, DL,
6098 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6099 : LoongArch::VPICKVE2GR_W),
6100 Dst)
6101 .addReg(ScratchReg3)
6102 .addImm(0);
6103
6104 MI.eraseFromParent();
6105 return BB;
6106 }
6107
6108 static MachineBasicBlock *
emitPseudoVMSKCOND(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6109 emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB,
6110 const LoongArchSubtarget &Subtarget) {
6111 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6112 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6113 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6114 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6115 Register Dst = MI.getOperand(0).getReg();
6116 Register Src = MI.getOperand(1).getReg();
6117 DebugLoc DL = MI.getDebugLoc();
6118 unsigned EleBits = 8;
6119 unsigned NotOpc = 0;
6120 unsigned MskOpc;
6121
6122 switch (MI.getOpcode()) {
6123 default:
6124 llvm_unreachable("Unexpected opcode");
6125 case LoongArch::PseudoVMSKLTZ_B:
6126 MskOpc = LoongArch::VMSKLTZ_B;
6127 break;
6128 case LoongArch::PseudoVMSKLTZ_H:
6129 MskOpc = LoongArch::VMSKLTZ_H;
6130 EleBits = 16;
6131 break;
6132 case LoongArch::PseudoVMSKLTZ_W:
6133 MskOpc = LoongArch::VMSKLTZ_W;
6134 EleBits = 32;
6135 break;
6136 case LoongArch::PseudoVMSKLTZ_D:
6137 MskOpc = LoongArch::VMSKLTZ_D;
6138 EleBits = 64;
6139 break;
6140 case LoongArch::PseudoVMSKGEZ_B:
6141 MskOpc = LoongArch::VMSKGEZ_B;
6142 break;
6143 case LoongArch::PseudoVMSKEQZ_B:
6144 MskOpc = LoongArch::VMSKNZ_B;
6145 NotOpc = LoongArch::VNOR_V;
6146 break;
6147 case LoongArch::PseudoVMSKNEZ_B:
6148 MskOpc = LoongArch::VMSKNZ_B;
6149 break;
6150 case LoongArch::PseudoXVMSKLTZ_B:
6151 MskOpc = LoongArch::XVMSKLTZ_B;
6152 RC = &LoongArch::LASX256RegClass;
6153 break;
6154 case LoongArch::PseudoXVMSKLTZ_H:
6155 MskOpc = LoongArch::XVMSKLTZ_H;
6156 RC = &LoongArch::LASX256RegClass;
6157 EleBits = 16;
6158 break;
6159 case LoongArch::PseudoXVMSKLTZ_W:
6160 MskOpc = LoongArch::XVMSKLTZ_W;
6161 RC = &LoongArch::LASX256RegClass;
6162 EleBits = 32;
6163 break;
6164 case LoongArch::PseudoXVMSKLTZ_D:
6165 MskOpc = LoongArch::XVMSKLTZ_D;
6166 RC = &LoongArch::LASX256RegClass;
6167 EleBits = 64;
6168 break;
6169 case LoongArch::PseudoXVMSKGEZ_B:
6170 MskOpc = LoongArch::XVMSKGEZ_B;
6171 RC = &LoongArch::LASX256RegClass;
6172 break;
6173 case LoongArch::PseudoXVMSKEQZ_B:
6174 MskOpc = LoongArch::XVMSKNZ_B;
6175 NotOpc = LoongArch::XVNOR_V;
6176 RC = &LoongArch::LASX256RegClass;
6177 break;
6178 case LoongArch::PseudoXVMSKNEZ_B:
6179 MskOpc = LoongArch::XVMSKNZ_B;
6180 RC = &LoongArch::LASX256RegClass;
6181 break;
6182 }
6183
6184 Register Msk = MRI.createVirtualRegister(RC);
6185 if (NotOpc) {
6186 Register Tmp = MRI.createVirtualRegister(RC);
6187 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
6188 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
6189 .addReg(Tmp, RegState::Kill)
6190 .addReg(Tmp, RegState::Kill);
6191 } else {
6192 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
6193 }
6194
6195 if (TRI->getRegSizeInBits(*RC) > 128) {
6196 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6197 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6198 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
6199 .addReg(Msk)
6200 .addImm(0);
6201 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
6202 .addReg(Msk, RegState::Kill)
6203 .addImm(4);
6204 BuildMI(*BB, MI, DL,
6205 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6206 : LoongArch::BSTRINS_W),
6207 Dst)
6208 .addReg(Lo, RegState::Kill)
6209 .addReg(Hi, RegState::Kill)
6210 .addImm(256 / EleBits - 1)
6211 .addImm(128 / EleBits);
6212 } else {
6213 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
6214 .addReg(Msk, RegState::Kill)
6215 .addImm(0);
6216 }
6217
6218 MI.eraseFromParent();
6219 return BB;
6220 }
6221
6222 static MachineBasicBlock *
emitSplitPairF64Pseudo(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6223 emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
6224 const LoongArchSubtarget &Subtarget) {
6225 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6226 "Unexpected instruction");
6227
6228 MachineFunction &MF = *BB->getParent();
6229 DebugLoc DL = MI.getDebugLoc();
6230 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6231 Register LoReg = MI.getOperand(0).getReg();
6232 Register HiReg = MI.getOperand(1).getReg();
6233 Register SrcReg = MI.getOperand(2).getReg();
6234
6235 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
6236 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
6237 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
6238 MI.eraseFromParent(); // The pseudo instruction is gone now.
6239 return BB;
6240 }
6241
6242 static MachineBasicBlock *
emitBuildPairF64Pseudo(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6243 emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
6244 const LoongArchSubtarget &Subtarget) {
6245 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6246 "Unexpected instruction");
6247
6248 MachineFunction &MF = *BB->getParent();
6249 DebugLoc DL = MI.getDebugLoc();
6250 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6251 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6252 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
6253 Register DstReg = MI.getOperand(0).getReg();
6254 Register LoReg = MI.getOperand(1).getReg();
6255 Register HiReg = MI.getOperand(2).getReg();
6256
6257 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
6258 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
6259 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
6260 .addReg(TmpReg, RegState::Kill)
6261 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
6262 MI.eraseFromParent(); // The pseudo instruction is gone now.
6263 return BB;
6264 }
6265
isSelectPseudo(MachineInstr & MI)6266 static bool isSelectPseudo(MachineInstr &MI) {
6267 switch (MI.getOpcode()) {
6268 default:
6269 return false;
6270 case LoongArch::Select_GPR_Using_CC_GPR:
6271 return true;
6272 }
6273 }
6274
6275 static MachineBasicBlock *
emitSelectPseudo(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)6276 emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB,
6277 const LoongArchSubtarget &Subtarget) {
6278 // To "insert" Select_* instructions, we actually have to insert the triangle
6279 // control-flow pattern. The incoming instructions know the destination vreg
6280 // to set, the condition code register to branch on, the true/false values to
6281 // select between, and the condcode to use to select the appropriate branch.
6282 //
6283 // We produce the following control flow:
6284 // HeadMBB
6285 // | \
6286 // | IfFalseMBB
6287 // | /
6288 // TailMBB
6289 //
6290 // When we find a sequence of selects we attempt to optimize their emission
6291 // by sharing the control flow. Currently we only handle cases where we have
6292 // multiple selects with the exact same condition (same LHS, RHS and CC).
6293 // The selects may be interleaved with other instructions if the other
6294 // instructions meet some requirements we deem safe:
6295 // - They are not pseudo instructions.
6296 // - They are debug instructions. Otherwise,
6297 // - They do not have side-effects, do not access memory and their inputs do
6298 // not depend on the results of the select pseudo-instructions.
6299 // The TrueV/FalseV operands of the selects cannot depend on the result of
6300 // previous selects in the sequence.
6301 // These conditions could be further relaxed. See the X86 target for a
6302 // related approach and more information.
6303
6304 Register LHS = MI.getOperand(1).getReg();
6305 Register RHS;
6306 if (MI.getOperand(2).isReg())
6307 RHS = MI.getOperand(2).getReg();
6308 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
6309
6310 SmallVector<MachineInstr *, 4> SelectDebugValues;
6311 SmallSet<Register, 4> SelectDests;
6312 SelectDests.insert(MI.getOperand(0).getReg());
6313
6314 MachineInstr *LastSelectPseudo = &MI;
6315 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6316 SequenceMBBI != E; ++SequenceMBBI) {
6317 if (SequenceMBBI->isDebugInstr())
6318 continue;
6319 if (isSelectPseudo(*SequenceMBBI)) {
6320 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6321 !SequenceMBBI->getOperand(2).isReg() ||
6322 SequenceMBBI->getOperand(2).getReg() != RHS ||
6323 SequenceMBBI->getOperand(3).getImm() != CC ||
6324 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6325 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6326 break;
6327 LastSelectPseudo = &*SequenceMBBI;
6328 SequenceMBBI->collectDebugValues(SelectDebugValues);
6329 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6330 continue;
6331 }
6332 if (SequenceMBBI->hasUnmodeledSideEffects() ||
6333 SequenceMBBI->mayLoadOrStore() ||
6334 SequenceMBBI->usesCustomInsertionHook())
6335 break;
6336 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6337 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6338 }))
6339 break;
6340 }
6341
6342 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6343 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6344 DebugLoc DL = MI.getDebugLoc();
6345 MachineFunction::iterator I = ++BB->getIterator();
6346
6347 MachineBasicBlock *HeadMBB = BB;
6348 MachineFunction *F = BB->getParent();
6349 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6350 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6351
6352 F->insert(I, IfFalseMBB);
6353 F->insert(I, TailMBB);
6354
6355 // Set the call frame size on entry to the new basic blocks.
6356 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
6357 IfFalseMBB->setCallFrameSize(CallFrameSize);
6358 TailMBB->setCallFrameSize(CallFrameSize);
6359
6360 // Transfer debug instructions associated with the selects to TailMBB.
6361 for (MachineInstr *DebugInstr : SelectDebugValues) {
6362 TailMBB->push_back(DebugInstr->removeFromParent());
6363 }
6364
6365 // Move all instructions after the sequence to TailMBB.
6366 TailMBB->splice(TailMBB->end(), HeadMBB,
6367 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6368 // Update machine-CFG edges by transferring all successors of the current
6369 // block to the new block which will contain the Phi nodes for the selects.
6370 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6371 // Set the successors for HeadMBB.
6372 HeadMBB->addSuccessor(IfFalseMBB);
6373 HeadMBB->addSuccessor(TailMBB);
6374
6375 // Insert appropriate branch.
6376 if (MI.getOperand(2).isImm())
6377 BuildMI(HeadMBB, DL, TII.get(CC))
6378 .addReg(LHS)
6379 .addImm(MI.getOperand(2).getImm())
6380 .addMBB(TailMBB);
6381 else
6382 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
6383
6384 // IfFalseMBB just falls through to TailMBB.
6385 IfFalseMBB->addSuccessor(TailMBB);
6386
6387 // Create PHIs for all of the select pseudo-instructions.
6388 auto SelectMBBI = MI.getIterator();
6389 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
6390 auto InsertionPoint = TailMBB->begin();
6391 while (SelectMBBI != SelectEnd) {
6392 auto Next = std::next(SelectMBBI);
6393 if (isSelectPseudo(*SelectMBBI)) {
6394 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6395 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
6396 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
6397 .addReg(SelectMBBI->getOperand(4).getReg())
6398 .addMBB(HeadMBB)
6399 .addReg(SelectMBBI->getOperand(5).getReg())
6400 .addMBB(IfFalseMBB);
6401 SelectMBBI->eraseFromParent();
6402 }
6403 SelectMBBI = Next;
6404 }
6405
6406 F->getProperties().resetNoPHIs();
6407 return TailMBB;
6408 }
6409
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * BB) const6410 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
6411 MachineInstr &MI, MachineBasicBlock *BB) const {
6412 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6413 DebugLoc DL = MI.getDebugLoc();
6414
6415 switch (MI.getOpcode()) {
6416 default:
6417 llvm_unreachable("Unexpected instr type to insert");
6418 case LoongArch::DIV_W:
6419 case LoongArch::DIV_WU:
6420 case LoongArch::MOD_W:
6421 case LoongArch::MOD_WU:
6422 case LoongArch::DIV_D:
6423 case LoongArch::DIV_DU:
6424 case LoongArch::MOD_D:
6425 case LoongArch::MOD_DU:
6426 return insertDivByZeroTrap(MI, BB);
6427 break;
6428 case LoongArch::WRFCSR: {
6429 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
6430 LoongArch::FCSR0 + MI.getOperand(0).getImm())
6431 .addReg(MI.getOperand(1).getReg());
6432 MI.eraseFromParent();
6433 return BB;
6434 }
6435 case LoongArch::RDFCSR: {
6436 MachineInstr *ReadFCSR =
6437 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
6438 MI.getOperand(0).getReg())
6439 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
6440 ReadFCSR->getOperand(1).setIsUndef();
6441 MI.eraseFromParent();
6442 return BB;
6443 }
6444 case LoongArch::Select_GPR_Using_CC_GPR:
6445 return emitSelectPseudo(MI, BB, Subtarget);
6446 case LoongArch::BuildPairF64Pseudo:
6447 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
6448 case LoongArch::SplitPairF64Pseudo:
6449 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
6450 case LoongArch::PseudoVBZ:
6451 case LoongArch::PseudoVBZ_B:
6452 case LoongArch::PseudoVBZ_H:
6453 case LoongArch::PseudoVBZ_W:
6454 case LoongArch::PseudoVBZ_D:
6455 case LoongArch::PseudoVBNZ:
6456 case LoongArch::PseudoVBNZ_B:
6457 case LoongArch::PseudoVBNZ_H:
6458 case LoongArch::PseudoVBNZ_W:
6459 case LoongArch::PseudoVBNZ_D:
6460 case LoongArch::PseudoXVBZ:
6461 case LoongArch::PseudoXVBZ_B:
6462 case LoongArch::PseudoXVBZ_H:
6463 case LoongArch::PseudoXVBZ_W:
6464 case LoongArch::PseudoXVBZ_D:
6465 case LoongArch::PseudoXVBNZ:
6466 case LoongArch::PseudoXVBNZ_B:
6467 case LoongArch::PseudoXVBNZ_H:
6468 case LoongArch::PseudoXVBNZ_W:
6469 case LoongArch::PseudoXVBNZ_D:
6470 return emitVecCondBranchPseudo(MI, BB, Subtarget);
6471 case LoongArch::PseudoXVINSGR2VR_B:
6472 case LoongArch::PseudoXVINSGR2VR_H:
6473 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
6474 case LoongArch::PseudoCTPOP:
6475 return emitPseudoCTPOP(MI, BB, Subtarget);
6476 case LoongArch::PseudoVMSKLTZ_B:
6477 case LoongArch::PseudoVMSKLTZ_H:
6478 case LoongArch::PseudoVMSKLTZ_W:
6479 case LoongArch::PseudoVMSKLTZ_D:
6480 case LoongArch::PseudoVMSKGEZ_B:
6481 case LoongArch::PseudoVMSKEQZ_B:
6482 case LoongArch::PseudoVMSKNEZ_B:
6483 case LoongArch::PseudoXVMSKLTZ_B:
6484 case LoongArch::PseudoXVMSKLTZ_H:
6485 case LoongArch::PseudoXVMSKLTZ_W:
6486 case LoongArch::PseudoXVMSKLTZ_D:
6487 case LoongArch::PseudoXVMSKGEZ_B:
6488 case LoongArch::PseudoXVMSKEQZ_B:
6489 case LoongArch::PseudoXVMSKNEZ_B:
6490 return emitPseudoVMSKCOND(MI, BB, Subtarget);
6491 case TargetOpcode::STATEPOINT:
6492 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
6493 // while bl call instruction (where statepoint will be lowered at the
6494 // end) has implicit def. This def is early-clobber as it will be set at
6495 // the moment of the call and earlier than any use is read.
6496 // Add this implicit dead def here as a workaround.
6497 MI.addOperand(*MI.getMF(),
6498 MachineOperand::CreateReg(
6499 LoongArch::R1, /*isDef*/ true,
6500 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
6501 /*isUndef*/ false, /*isEarlyClobber*/ true));
6502 if (!Subtarget.is64Bit())
6503 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
6504 return emitPatchPoint(MI, BB);
6505 }
6506 }
6507
allowsMisalignedMemoryAccesses(EVT VT,unsigned AddrSpace,Align Alignment,MachineMemOperand::Flags Flags,unsigned * Fast) const6508 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
6509 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
6510 unsigned *Fast) const {
6511 if (!Subtarget.hasUAL())
6512 return false;
6513
6514 // TODO: set reasonable speed number.
6515 if (Fast)
6516 *Fast = 1;
6517 return true;
6518 }
6519
getTargetNodeName(unsigned Opcode) const6520 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
6521 switch ((LoongArchISD::NodeType)Opcode) {
6522 case LoongArchISD::FIRST_NUMBER:
6523 break;
6524
6525 #define NODE_NAME_CASE(node) \
6526 case LoongArchISD::node: \
6527 return "LoongArchISD::" #node;
6528
6529 // TODO: Add more target-dependent nodes later.
6530 NODE_NAME_CASE(CALL)
6531 NODE_NAME_CASE(CALL_MEDIUM)
6532 NODE_NAME_CASE(CALL_LARGE)
6533 NODE_NAME_CASE(RET)
6534 NODE_NAME_CASE(TAIL)
6535 NODE_NAME_CASE(TAIL_MEDIUM)
6536 NODE_NAME_CASE(TAIL_LARGE)
6537 NODE_NAME_CASE(SELECT_CC)
6538 NODE_NAME_CASE(SLL_W)
6539 NODE_NAME_CASE(SRA_W)
6540 NODE_NAME_CASE(SRL_W)
6541 NODE_NAME_CASE(BSTRINS)
6542 NODE_NAME_CASE(BSTRPICK)
6543 NODE_NAME_CASE(MOVGR2FR_W_LA64)
6544 NODE_NAME_CASE(MOVFR2GR_S_LA64)
6545 NODE_NAME_CASE(FTINT)
6546 NODE_NAME_CASE(BUILD_PAIR_F64)
6547 NODE_NAME_CASE(SPLIT_PAIR_F64)
6548 NODE_NAME_CASE(REVB_2H)
6549 NODE_NAME_CASE(REVB_2W)
6550 NODE_NAME_CASE(BITREV_4B)
6551 NODE_NAME_CASE(BITREV_8B)
6552 NODE_NAME_CASE(BITREV_W)
6553 NODE_NAME_CASE(ROTR_W)
6554 NODE_NAME_CASE(ROTL_W)
6555 NODE_NAME_CASE(DIV_W)
6556 NODE_NAME_CASE(DIV_WU)
6557 NODE_NAME_CASE(MOD_W)
6558 NODE_NAME_CASE(MOD_WU)
6559 NODE_NAME_CASE(CLZ_W)
6560 NODE_NAME_CASE(CTZ_W)
6561 NODE_NAME_CASE(DBAR)
6562 NODE_NAME_CASE(IBAR)
6563 NODE_NAME_CASE(BREAK)
6564 NODE_NAME_CASE(SYSCALL)
6565 NODE_NAME_CASE(CRC_W_B_W)
6566 NODE_NAME_CASE(CRC_W_H_W)
6567 NODE_NAME_CASE(CRC_W_W_W)
6568 NODE_NAME_CASE(CRC_W_D_W)
6569 NODE_NAME_CASE(CRCC_W_B_W)
6570 NODE_NAME_CASE(CRCC_W_H_W)
6571 NODE_NAME_CASE(CRCC_W_W_W)
6572 NODE_NAME_CASE(CRCC_W_D_W)
6573 NODE_NAME_CASE(CSRRD)
6574 NODE_NAME_CASE(CSRWR)
6575 NODE_NAME_CASE(CSRXCHG)
6576 NODE_NAME_CASE(IOCSRRD_B)
6577 NODE_NAME_CASE(IOCSRRD_H)
6578 NODE_NAME_CASE(IOCSRRD_W)
6579 NODE_NAME_CASE(IOCSRRD_D)
6580 NODE_NAME_CASE(IOCSRWR_B)
6581 NODE_NAME_CASE(IOCSRWR_H)
6582 NODE_NAME_CASE(IOCSRWR_W)
6583 NODE_NAME_CASE(IOCSRWR_D)
6584 NODE_NAME_CASE(CPUCFG)
6585 NODE_NAME_CASE(MOVGR2FCSR)
6586 NODE_NAME_CASE(MOVFCSR2GR)
6587 NODE_NAME_CASE(CACOP_D)
6588 NODE_NAME_CASE(CACOP_W)
6589 NODE_NAME_CASE(VSHUF)
6590 NODE_NAME_CASE(VPICKEV)
6591 NODE_NAME_CASE(VPICKOD)
6592 NODE_NAME_CASE(VPACKEV)
6593 NODE_NAME_CASE(VPACKOD)
6594 NODE_NAME_CASE(VILVL)
6595 NODE_NAME_CASE(VILVH)
6596 NODE_NAME_CASE(VSHUF4I)
6597 NODE_NAME_CASE(VREPLVEI)
6598 NODE_NAME_CASE(VREPLGR2VR)
6599 NODE_NAME_CASE(XVPERMI)
6600 NODE_NAME_CASE(VPICK_SEXT_ELT)
6601 NODE_NAME_CASE(VPICK_ZEXT_ELT)
6602 NODE_NAME_CASE(VREPLVE)
6603 NODE_NAME_CASE(VALL_ZERO)
6604 NODE_NAME_CASE(VANY_ZERO)
6605 NODE_NAME_CASE(VALL_NONZERO)
6606 NODE_NAME_CASE(VANY_NONZERO)
6607 NODE_NAME_CASE(FRECIPE)
6608 NODE_NAME_CASE(FRSQRTE)
6609 NODE_NAME_CASE(VSLLI)
6610 NODE_NAME_CASE(VSRLI)
6611 NODE_NAME_CASE(VBSLL)
6612 NODE_NAME_CASE(VBSRL)
6613 NODE_NAME_CASE(VLDREPL)
6614 NODE_NAME_CASE(VMSKLTZ)
6615 NODE_NAME_CASE(VMSKGEZ)
6616 NODE_NAME_CASE(VMSKEQZ)
6617 NODE_NAME_CASE(VMSKNEZ)
6618 NODE_NAME_CASE(XVMSKLTZ)
6619 NODE_NAME_CASE(XVMSKGEZ)
6620 NODE_NAME_CASE(XVMSKEQZ)
6621 NODE_NAME_CASE(XVMSKNEZ)
6622 }
6623 #undef NODE_NAME_CASE
6624 return nullptr;
6625 }
6626
6627 //===----------------------------------------------------------------------===//
6628 // Calling Convention Implementation
6629 //===----------------------------------------------------------------------===//
6630
6631 // Eight general-purpose registers a0-a7 used for passing integer arguments,
6632 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
6633 // fixed-point arguments, and floating-point arguments when no FPR is available
6634 // or with soft float ABI.
6635 const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
6636 LoongArch::R7, LoongArch::R8, LoongArch::R9,
6637 LoongArch::R10, LoongArch::R11};
6638 // Eight floating-point registers fa0-fa7 used for passing floating-point
6639 // arguments, and fa0-fa1 are also used to return values.
6640 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
6641 LoongArch::F3, LoongArch::F4, LoongArch::F5,
6642 LoongArch::F6, LoongArch::F7};
6643 // FPR32 and FPR64 alias each other.
6644 const MCPhysReg ArgFPR64s[] = {
6645 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
6646 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
6647
6648 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
6649 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
6650 LoongArch::VR6, LoongArch::VR7};
6651
6652 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
6653 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
6654 LoongArch::XR6, LoongArch::XR7};
6655
6656 // Pass a 2*GRLen argument that has been split into two GRLen values through
6657 // registers or the stack as necessary.
CC_LoongArchAssign2GRLen(unsigned GRLen,CCState & State,CCValAssign VA1,ISD::ArgFlagsTy ArgFlags1,unsigned ValNo2,MVT ValVT2,MVT LocVT2,ISD::ArgFlagsTy ArgFlags2)6658 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
6659 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
6660 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
6661 ISD::ArgFlagsTy ArgFlags2) {
6662 unsigned GRLenInBytes = GRLen / 8;
6663 if (Register Reg = State.AllocateReg(ArgGPRs)) {
6664 // At least one half can be passed via register.
6665 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
6666 VA1.getLocVT(), CCValAssign::Full));
6667 } else {
6668 // Both halves must be passed on the stack, with proper alignment.
6669 Align StackAlign =
6670 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
6671 State.addLoc(
6672 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
6673 State.AllocateStack(GRLenInBytes, StackAlign),
6674 VA1.getLocVT(), CCValAssign::Full));
6675 State.addLoc(CCValAssign::getMem(
6676 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
6677 LocVT2, CCValAssign::Full));
6678 return false;
6679 }
6680 if (Register Reg = State.AllocateReg(ArgGPRs)) {
6681 // The second half can also be passed via register.
6682 State.addLoc(
6683 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
6684 } else {
6685 // The second half is passed via the stack, without additional alignment.
6686 State.addLoc(CCValAssign::getMem(
6687 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
6688 LocVT2, CCValAssign::Full));
6689 }
6690 return false;
6691 }
6692
6693 // Implements the LoongArch calling convention. Returns true upon failure.
CC_LoongArch(const DataLayout & DL,LoongArchABI::ABI ABI,unsigned ValNo,MVT ValVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State,bool IsFixed,bool IsRet,Type * OrigTy)6694 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
6695 unsigned ValNo, MVT ValVT,
6696 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6697 CCState &State, bool IsFixed, bool IsRet,
6698 Type *OrigTy) {
6699 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
6700 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
6701 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
6702 MVT LocVT = ValVT;
6703
6704 // Any return value split into more than two values can't be returned
6705 // directly.
6706 if (IsRet && ValNo > 1)
6707 return true;
6708
6709 // If passing a variadic argument, or if no FPR is available.
6710 bool UseGPRForFloat = true;
6711
6712 switch (ABI) {
6713 default:
6714 llvm_unreachable("Unexpected ABI");
6715 break;
6716 case LoongArchABI::ABI_ILP32F:
6717 case LoongArchABI::ABI_LP64F:
6718 case LoongArchABI::ABI_ILP32D:
6719 case LoongArchABI::ABI_LP64D:
6720 UseGPRForFloat = !IsFixed;
6721 break;
6722 case LoongArchABI::ABI_ILP32S:
6723 case LoongArchABI::ABI_LP64S:
6724 break;
6725 }
6726
6727 // If this is a variadic argument, the LoongArch calling convention requires
6728 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
6729 // byte alignment. An aligned register should be used regardless of whether
6730 // the original argument was split during legalisation or not. The argument
6731 // will not be passed by registers if the original type is larger than
6732 // 2*GRLen, so the register alignment rule does not apply.
6733 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
6734 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
6735 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
6736 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
6737 // Skip 'odd' register if necessary.
6738 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
6739 State.AllocateReg(ArgGPRs);
6740 }
6741
6742 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
6743 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
6744 State.getPendingArgFlags();
6745
6746 assert(PendingLocs.size() == PendingArgFlags.size() &&
6747 "PendingLocs and PendingArgFlags out of sync");
6748
6749 // FPR32 and FPR64 alias each other.
6750 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
6751 UseGPRForFloat = true;
6752
6753 if (UseGPRForFloat && ValVT == MVT::f32) {
6754 LocVT = GRLenVT;
6755 LocInfo = CCValAssign::BCvt;
6756 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
6757 LocVT = MVT::i64;
6758 LocInfo = CCValAssign::BCvt;
6759 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
6760 // Handle passing f64 on LA32D with a soft float ABI or when floating point
6761 // registers are exhausted.
6762 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
6763 // Depending on available argument GPRS, f64 may be passed in a pair of
6764 // GPRs, split between a GPR and the stack, or passed completely on the
6765 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
6766 // cases.
6767 MCRegister Reg = State.AllocateReg(ArgGPRs);
6768 if (!Reg) {
6769 int64_t StackOffset = State.AllocateStack(8, Align(8));
6770 State.addLoc(
6771 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6772 return false;
6773 }
6774 LocVT = MVT::i32;
6775 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6776 MCRegister HiReg = State.AllocateReg(ArgGPRs);
6777 if (HiReg) {
6778 State.addLoc(
6779 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
6780 } else {
6781 int64_t StackOffset = State.AllocateStack(4, Align(4));
6782 State.addLoc(
6783 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6784 }
6785 return false;
6786 }
6787
6788 // Split arguments might be passed indirectly, so keep track of the pending
6789 // values.
6790 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
6791 LocVT = GRLenVT;
6792 LocInfo = CCValAssign::Indirect;
6793 PendingLocs.push_back(
6794 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
6795 PendingArgFlags.push_back(ArgFlags);
6796 if (!ArgFlags.isSplitEnd()) {
6797 return false;
6798 }
6799 }
6800
6801 // If the split argument only had two elements, it should be passed directly
6802 // in registers or on the stack.
6803 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
6804 PendingLocs.size() <= 2) {
6805 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
6806 // Apply the normal calling convention rules to the first half of the
6807 // split argument.
6808 CCValAssign VA = PendingLocs[0];
6809 ISD::ArgFlagsTy AF = PendingArgFlags[0];
6810 PendingLocs.clear();
6811 PendingArgFlags.clear();
6812 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
6813 ArgFlags);
6814 }
6815
6816 // Allocate to a register if possible, or else a stack slot.
6817 Register Reg;
6818 unsigned StoreSizeBytes = GRLen / 8;
6819 Align StackAlign = Align(GRLen / 8);
6820
6821 if (ValVT == MVT::f32 && !UseGPRForFloat)
6822 Reg = State.AllocateReg(ArgFPR32s);
6823 else if (ValVT == MVT::f64 && !UseGPRForFloat)
6824 Reg = State.AllocateReg(ArgFPR64s);
6825 else if (ValVT.is128BitVector())
6826 Reg = State.AllocateReg(ArgVRs);
6827 else if (ValVT.is256BitVector())
6828 Reg = State.AllocateReg(ArgXRs);
6829 else
6830 Reg = State.AllocateReg(ArgGPRs);
6831
6832 unsigned StackOffset =
6833 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
6834
6835 // If we reach this point and PendingLocs is non-empty, we must be at the
6836 // end of a split argument that must be passed indirectly.
6837 if (!PendingLocs.empty()) {
6838 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
6839 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
6840 for (auto &It : PendingLocs) {
6841 if (Reg)
6842 It.convertToReg(Reg);
6843 else
6844 It.convertToMem(StackOffset);
6845 State.addLoc(It);
6846 }
6847 PendingLocs.clear();
6848 PendingArgFlags.clear();
6849 return false;
6850 }
6851 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
6852 "Expected an GRLenVT at this stage");
6853
6854 if (Reg) {
6855 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6856 return false;
6857 }
6858
6859 // When a floating-point value is passed on the stack, no bit-cast is needed.
6860 if (ValVT.isFloatingPoint()) {
6861 LocVT = ValVT;
6862 LocInfo = CCValAssign::Full;
6863 }
6864
6865 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6866 return false;
6867 }
6868
analyzeInputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::InputArg> & Ins,bool IsRet,LoongArchCCAssignFn Fn) const6869 void LoongArchTargetLowering::analyzeInputArgs(
6870 MachineFunction &MF, CCState &CCInfo,
6871 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
6872 LoongArchCCAssignFn Fn) const {
6873 FunctionType *FType = MF.getFunction().getFunctionType();
6874 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
6875 MVT ArgVT = Ins[i].VT;
6876 Type *ArgTy = nullptr;
6877 if (IsRet)
6878 ArgTy = FType->getReturnType();
6879 else if (Ins[i].isOrigArg())
6880 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
6881 LoongArchABI::ABI ABI =
6882 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
6883 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
6884 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
6885 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
6886 << '\n');
6887 llvm_unreachable("");
6888 }
6889 }
6890 }
6891
analyzeOutputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::OutputArg> & Outs,bool IsRet,CallLoweringInfo * CLI,LoongArchCCAssignFn Fn) const6892 void LoongArchTargetLowering::analyzeOutputArgs(
6893 MachineFunction &MF, CCState &CCInfo,
6894 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
6895 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
6896 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
6897 MVT ArgVT = Outs[i].VT;
6898 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
6899 LoongArchABI::ABI ABI =
6900 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
6901 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
6902 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
6903 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
6904 << "\n");
6905 llvm_unreachable("");
6906 }
6907 }
6908 }
6909
6910 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
6911 // values.
convertLocVTToValVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)6912 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
6913 const CCValAssign &VA, const SDLoc &DL) {
6914 switch (VA.getLocInfo()) {
6915 default:
6916 llvm_unreachable("Unexpected CCValAssign::LocInfo");
6917 case CCValAssign::Full:
6918 case CCValAssign::Indirect:
6919 break;
6920 case CCValAssign::BCvt:
6921 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
6922 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
6923 else
6924 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
6925 break;
6926 }
6927 return Val;
6928 }
6929
unpackFromRegLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL,const ISD::InputArg & In,const LoongArchTargetLowering & TLI)6930 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
6931 const CCValAssign &VA, const SDLoc &DL,
6932 const ISD::InputArg &In,
6933 const LoongArchTargetLowering &TLI) {
6934 MachineFunction &MF = DAG.getMachineFunction();
6935 MachineRegisterInfo &RegInfo = MF.getRegInfo();
6936 EVT LocVT = VA.getLocVT();
6937 SDValue Val;
6938 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
6939 Register VReg = RegInfo.createVirtualRegister(RC);
6940 RegInfo.addLiveIn(VA.getLocReg(), VReg);
6941 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
6942
6943 // If input is sign extended from 32 bits, note it for the OptW pass.
6944 if (In.isOrigArg()) {
6945 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
6946 if (OrigArg->getType()->isIntegerTy()) {
6947 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
6948 // An input zero extended from i31 can also be considered sign extended.
6949 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
6950 (BitWidth < 32 && In.Flags.isZExt())) {
6951 LoongArchMachineFunctionInfo *LAFI =
6952 MF.getInfo<LoongArchMachineFunctionInfo>();
6953 LAFI->addSExt32Register(VReg);
6954 }
6955 }
6956 }
6957
6958 return convertLocVTToValVT(DAG, Val, VA, DL);
6959 }
6960
6961 // The caller is responsible for loading the full value if the argument is
6962 // passed with CCValAssign::Indirect.
unpackFromMemLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL)6963 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
6964 const CCValAssign &VA, const SDLoc &DL) {
6965 MachineFunction &MF = DAG.getMachineFunction();
6966 MachineFrameInfo &MFI = MF.getFrameInfo();
6967 EVT ValVT = VA.getValVT();
6968 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
6969 /*IsImmutable=*/true);
6970 SDValue FIN = DAG.getFrameIndex(
6971 FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
6972
6973 ISD::LoadExtType ExtType;
6974 switch (VA.getLocInfo()) {
6975 default:
6976 llvm_unreachable("Unexpected CCValAssign::LocInfo");
6977 case CCValAssign::Full:
6978 case CCValAssign::Indirect:
6979 case CCValAssign::BCvt:
6980 ExtType = ISD::NON_EXTLOAD;
6981 break;
6982 }
6983 return DAG.getExtLoad(
6984 ExtType, DL, VA.getLocVT(), Chain, FIN,
6985 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
6986 }
6987
unpackF64OnLA32DSoftABI(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const CCValAssign & HiVA,const SDLoc & DL)6988 static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain,
6989 const CCValAssign &VA,
6990 const CCValAssign &HiVA,
6991 const SDLoc &DL) {
6992 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
6993 "Unexpected VA");
6994 MachineFunction &MF = DAG.getMachineFunction();
6995 MachineFrameInfo &MFI = MF.getFrameInfo();
6996 MachineRegisterInfo &RegInfo = MF.getRegInfo();
6997
6998 assert(VA.isRegLoc() && "Expected register VA assignment");
6999
7000 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7001 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7002 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7003 SDValue Hi;
7004 if (HiVA.isMemLoc()) {
7005 // Second half of f64 is passed on the stack.
7006 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7007 /*IsImmutable=*/true);
7008 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7009 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7010 MachinePointerInfo::getFixedStack(MF, FI));
7011 } else {
7012 // Second half of f64 is passed in another GPR.
7013 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7014 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7015 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7016 }
7017 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7018 }
7019
convertValVTToLocVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)7020 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
7021 const CCValAssign &VA, const SDLoc &DL) {
7022 EVT LocVT = VA.getLocVT();
7023
7024 switch (VA.getLocInfo()) {
7025 default:
7026 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7027 case CCValAssign::Full:
7028 break;
7029 case CCValAssign::BCvt:
7030 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7031 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7032 else
7033 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7034 break;
7035 }
7036 return Val;
7037 }
7038
CC_LoongArch_GHC(unsigned ValNo,MVT ValVT,MVT LocVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State)7039 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7040 CCValAssign::LocInfo LocInfo,
7041 ISD::ArgFlagsTy ArgFlags, CCState &State) {
7042 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7043 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7044 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7045 static const MCPhysReg GPRList[] = {
7046 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7047 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7048 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7049 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7050 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7051 return false;
7052 }
7053 }
7054
7055 if (LocVT == MVT::f32) {
7056 // Pass in STG registers: F1, F2, F3, F4
7057 // fs0,fs1,fs2,fs3
7058 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7059 LoongArch::F26, LoongArch::F27};
7060 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7061 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7062 return false;
7063 }
7064 }
7065
7066 if (LocVT == MVT::f64) {
7067 // Pass in STG registers: D1, D2, D3, D4
7068 // fs4,fs5,fs6,fs7
7069 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7070 LoongArch::F30_64, LoongArch::F31_64};
7071 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7072 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7073 return false;
7074 }
7075 }
7076
7077 report_fatal_error("No registers left in GHC calling convention");
7078 return true;
7079 }
7080
7081 // Transform physical registers into virtual registers.
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const7082 SDValue LoongArchTargetLowering::LowerFormalArguments(
7083 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7084 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7085 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7086
7087 MachineFunction &MF = DAG.getMachineFunction();
7088
7089 switch (CallConv) {
7090 default:
7091 llvm_unreachable("Unsupported calling convention");
7092 case CallingConv::C:
7093 case CallingConv::Fast:
7094 break;
7095 case CallingConv::GHC:
7096 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7097 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7098 report_fatal_error(
7099 "GHC calling convention requires the F and D extensions");
7100 }
7101
7102 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7103 MVT GRLenVT = Subtarget.getGRLenVT();
7104 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7105 // Used with varargs to acumulate store chains.
7106 std::vector<SDValue> OutChains;
7107
7108 // Assign locations to all of the incoming arguments.
7109 SmallVector<CCValAssign> ArgLocs;
7110 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7111
7112 if (CallConv == CallingConv::GHC)
7113 CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
7114 else
7115 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7116
7117 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7118 CCValAssign &VA = ArgLocs[i];
7119 SDValue ArgValue;
7120 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7121 // case.
7122 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7123 assert(VA.needsCustom());
7124 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7125 } else if (VA.isRegLoc())
7126 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7127 else
7128 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7129 if (VA.getLocInfo() == CCValAssign::Indirect) {
7130 // If the original argument was split and passed by reference, we need to
7131 // load all parts of it here (using the same address).
7132 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7133 MachinePointerInfo()));
7134 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7135 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7136 assert(ArgPartOffset == 0);
7137 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7138 CCValAssign &PartVA = ArgLocs[i + 1];
7139 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7140 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7141 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7142 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7143 MachinePointerInfo()));
7144 ++i;
7145 ++InsIdx;
7146 }
7147 continue;
7148 }
7149 InVals.push_back(ArgValue);
7150 }
7151
7152 if (IsVarArg) {
7153 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
7154 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7155 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7156 MachineFrameInfo &MFI = MF.getFrameInfo();
7157 MachineRegisterInfo &RegInfo = MF.getRegInfo();
7158 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7159
7160 // Offset of the first variable argument from stack pointer, and size of
7161 // the vararg save area. For now, the varargs save area is either zero or
7162 // large enough to hold a0-a7.
7163 int VaArgOffset, VarArgsSaveSize;
7164
7165 // If all registers are allocated, then all varargs must be passed on the
7166 // stack and we don't need to save any argregs.
7167 if (ArgRegs.size() == Idx) {
7168 VaArgOffset = CCInfo.getStackSize();
7169 VarArgsSaveSize = 0;
7170 } else {
7171 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7172 VaArgOffset = -VarArgsSaveSize;
7173 }
7174
7175 // Record the frame index of the first variable argument
7176 // which is a value necessary to VASTART.
7177 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7178 LoongArchFI->setVarArgsFrameIndex(FI);
7179
7180 // If saving an odd number of registers then create an extra stack slot to
7181 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7182 // offsets to even-numbered registered remain 2*GRLen-aligned.
7183 if (Idx % 2) {
7184 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
7185 true);
7186 VarArgsSaveSize += GRLenInBytes;
7187 }
7188
7189 // Copy the integer registers that may have been used for passing varargs
7190 // to the vararg save area.
7191 for (unsigned I = Idx; I < ArgRegs.size();
7192 ++I, VaArgOffset += GRLenInBytes) {
7193 const Register Reg = RegInfo.createVirtualRegister(RC);
7194 RegInfo.addLiveIn(ArgRegs[I], Reg);
7195 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
7196 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7197 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7198 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7199 MachinePointerInfo::getFixedStack(MF, FI));
7200 cast<StoreSDNode>(Store.getNode())
7201 ->getMemOperand()
7202 ->setValue((Value *)nullptr);
7203 OutChains.push_back(Store);
7204 }
7205 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7206 }
7207
7208 // All stores are grouped in one node to allow the matching between
7209 // the size of Ins and InVals. This only happens for vararg functions.
7210 if (!OutChains.empty()) {
7211 OutChains.push_back(Chain);
7212 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7213 }
7214
7215 return Chain;
7216 }
7217
mayBeEmittedAsTailCall(const CallInst * CI) const7218 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
7219 return CI->isTailCall();
7220 }
7221
7222 // Check if the return value is used as only a return value, as otherwise
7223 // we can't perform a tail-call.
isUsedByReturnOnly(SDNode * N,SDValue & Chain) const7224 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
7225 SDValue &Chain) const {
7226 if (N->getNumValues() != 1)
7227 return false;
7228 if (!N->hasNUsesOfValue(1, 0))
7229 return false;
7230
7231 SDNode *Copy = *N->user_begin();
7232 if (Copy->getOpcode() != ISD::CopyToReg)
7233 return false;
7234
7235 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7236 // isn't safe to perform a tail call.
7237 if (Copy->getGluedNode())
7238 return false;
7239
7240 // The copy must be used by a LoongArchISD::RET, and nothing else.
7241 bool HasRet = false;
7242 for (SDNode *Node : Copy->users()) {
7243 if (Node->getOpcode() != LoongArchISD::RET)
7244 return false;
7245 HasRet = true;
7246 }
7247
7248 if (!HasRet)
7249 return false;
7250
7251 Chain = Copy->getOperand(0);
7252 return true;
7253 }
7254
7255 // Check whether the call is eligible for tail call optimization.
isEligibleForTailCallOptimization(CCState & CCInfo,CallLoweringInfo & CLI,MachineFunction & MF,const SmallVectorImpl<CCValAssign> & ArgLocs) const7256 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7257 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7258 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7259
7260 auto CalleeCC = CLI.CallConv;
7261 auto &Outs = CLI.Outs;
7262 auto &Caller = MF.getFunction();
7263 auto CallerCC = Caller.getCallingConv();
7264
7265 // Do not tail call opt if the stack is used to pass parameters.
7266 if (CCInfo.getStackSize() != 0)
7267 return false;
7268
7269 // Do not tail call opt if any parameters need to be passed indirectly.
7270 for (auto &VA : ArgLocs)
7271 if (VA.getLocInfo() == CCValAssign::Indirect)
7272 return false;
7273
7274 // Do not tail call opt if either caller or callee uses struct return
7275 // semantics.
7276 auto IsCallerStructRet = Caller.hasStructRetAttr();
7277 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7278 if (IsCallerStructRet || IsCalleeStructRet)
7279 return false;
7280
7281 // Do not tail call opt if either the callee or caller has a byval argument.
7282 for (auto &Arg : Outs)
7283 if (Arg.Flags.isByVal())
7284 return false;
7285
7286 // The callee has to preserve all registers the caller needs to preserve.
7287 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7288 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7289 if (CalleeCC != CallerCC) {
7290 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7291 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7292 return false;
7293 }
7294 return true;
7295 }
7296
getPrefTypeAlign(EVT VT,SelectionDAG & DAG)7297 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
7298 return DAG.getDataLayout().getPrefTypeAlign(
7299 VT.getTypeForEVT(*DAG.getContext()));
7300 }
7301
7302 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7303 // and output parameter nodes.
7304 SDValue
LowerCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const7305 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
7306 SmallVectorImpl<SDValue> &InVals) const {
7307 SelectionDAG &DAG = CLI.DAG;
7308 SDLoc &DL = CLI.DL;
7309 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
7310 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7311 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
7312 SDValue Chain = CLI.Chain;
7313 SDValue Callee = CLI.Callee;
7314 CallingConv::ID CallConv = CLI.CallConv;
7315 bool IsVarArg = CLI.IsVarArg;
7316 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7317 MVT GRLenVT = Subtarget.getGRLenVT();
7318 bool &IsTailCall = CLI.IsTailCall;
7319
7320 MachineFunction &MF = DAG.getMachineFunction();
7321
7322 // Analyze the operands of the call, assigning locations to each operand.
7323 SmallVector<CCValAssign> ArgLocs;
7324 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7325
7326 if (CallConv == CallingConv::GHC)
7327 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
7328 else
7329 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
7330
7331 // Check if it's really possible to do a tail call.
7332 if (IsTailCall)
7333 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7334
7335 if (IsTailCall)
7336 ++NumTailCalls;
7337 else if (CLI.CB && CLI.CB->isMustTailCall())
7338 report_fatal_error("failed to perform tail call elimination on a call "
7339 "site marked musttail");
7340
7341 // Get a count of how many bytes are to be pushed on the stack.
7342 unsigned NumBytes = ArgCCInfo.getStackSize();
7343
7344 // Create local copies for byval args.
7345 SmallVector<SDValue> ByValArgs;
7346 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7347 ISD::ArgFlagsTy Flags = Outs[i].Flags;
7348 if (!Flags.isByVal())
7349 continue;
7350
7351 SDValue Arg = OutVals[i];
7352 unsigned Size = Flags.getByValSize();
7353 Align Alignment = Flags.getNonZeroByValAlign();
7354
7355 int FI =
7356 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7357 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7358 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
7359
7360 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7361 /*IsVolatile=*/false,
7362 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
7363 MachinePointerInfo(), MachinePointerInfo());
7364 ByValArgs.push_back(FIPtr);
7365 }
7366
7367 if (!IsTailCall)
7368 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7369
7370 // Copy argument values to their designated locations.
7371 SmallVector<std::pair<Register, SDValue>> RegsToPass;
7372 SmallVector<SDValue> MemOpChains;
7373 SDValue StackPtr;
7374 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
7375 ++i, ++OutIdx) {
7376 CCValAssign &VA = ArgLocs[i];
7377 SDValue ArgValue = OutVals[OutIdx];
7378 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
7379
7380 // Handle passing f64 on LA32D with a soft float ABI as a special case.
7381 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7382 assert(VA.isRegLoc() && "Expected register VA assignment");
7383 assert(VA.needsCustom());
7384 SDValue SplitF64 =
7385 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
7386 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
7387 SDValue Lo = SplitF64.getValue(0);
7388 SDValue Hi = SplitF64.getValue(1);
7389
7390 Register RegLo = VA.getLocReg();
7391 RegsToPass.push_back(std::make_pair(RegLo, Lo));
7392
7393 // Get the CCValAssign for the Hi part.
7394 CCValAssign &HiVA = ArgLocs[++i];
7395
7396 if (HiVA.isMemLoc()) {
7397 // Second half of f64 is passed on the stack.
7398 if (!StackPtr.getNode())
7399 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7400 SDValue Address =
7401 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7402 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
7403 // Emit the store.
7404 MemOpChains.push_back(DAG.getStore(
7405 Chain, DL, Hi, Address,
7406 MachinePointerInfo::getStack(MF, HiVA.getLocMemOffset())));
7407 } else {
7408 // Second half of f64 is passed in another GPR.
7409 Register RegHigh = HiVA.getLocReg();
7410 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
7411 }
7412 continue;
7413 }
7414
7415 // Promote the value if needed.
7416 // For now, only handle fully promoted and indirect arguments.
7417 if (VA.getLocInfo() == CCValAssign::Indirect) {
7418 // Store the argument in a stack slot and pass its address.
7419 Align StackAlign =
7420 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
7421 getPrefTypeAlign(ArgValue.getValueType(), DAG));
7422 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7423 // If the original argument was split and passed by reference, we need to
7424 // store the required parts of it here (and pass just one address).
7425 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
7426 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
7427 assert(ArgPartOffset == 0);
7428 // Calculate the total size to store. We don't have access to what we're
7429 // actually storing other than performing the loop and collecting the
7430 // info.
7431 SmallVector<std::pair<SDValue, SDValue>> Parts;
7432 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
7433 SDValue PartValue = OutVals[OutIdx + 1];
7434 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
7435 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7436 EVT PartVT = PartValue.getValueType();
7437
7438 StoredSize += PartVT.getStoreSize();
7439 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
7440 Parts.push_back(std::make_pair(PartValue, Offset));
7441 ++i;
7442 ++OutIdx;
7443 }
7444 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
7445 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
7446 MemOpChains.push_back(
7447 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
7448 MachinePointerInfo::getFixedStack(MF, FI)));
7449 for (const auto &Part : Parts) {
7450 SDValue PartValue = Part.first;
7451 SDValue PartOffset = Part.second;
7452 SDValue Address =
7453 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
7454 MemOpChains.push_back(
7455 DAG.getStore(Chain, DL, PartValue, Address,
7456 MachinePointerInfo::getFixedStack(MF, FI)));
7457 }
7458 ArgValue = SpillSlot;
7459 } else {
7460 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
7461 }
7462
7463 // Use local copy if it is a byval arg.
7464 if (Flags.isByVal())
7465 ArgValue = ByValArgs[j++];
7466
7467 if (VA.isRegLoc()) {
7468 // Queue up the argument copies and emit them at the end.
7469 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
7470 } else {
7471 assert(VA.isMemLoc() && "Argument not register or memory");
7472 assert(!IsTailCall && "Tail call not allowed if stack is used "
7473 "for passing parameters");
7474
7475 // Work out the address of the stack slot.
7476 if (!StackPtr.getNode())
7477 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7478 SDValue Address =
7479 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7480 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
7481
7482 // Emit the store.
7483 MemOpChains.push_back(
7484 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
7485 }
7486 }
7487
7488 // Join the stores, which are independent of one another.
7489 if (!MemOpChains.empty())
7490 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
7491
7492 SDValue Glue;
7493
7494 // Build a sequence of copy-to-reg nodes, chained and glued together.
7495 for (auto &Reg : RegsToPass) {
7496 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
7497 Glue = Chain.getValue(1);
7498 }
7499
7500 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
7501 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
7502 // split it and then direct call can be matched by PseudoCALL.
7503 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
7504 const GlobalValue *GV = S->getGlobal();
7505 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
7506 ? LoongArchII::MO_CALL
7507 : LoongArchII::MO_CALL_PLT;
7508 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
7509 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
7510 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
7511 ? LoongArchII::MO_CALL
7512 : LoongArchII::MO_CALL_PLT;
7513 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
7514 }
7515
7516 // The first call operand is the chain and the second is the target address.
7517 SmallVector<SDValue> Ops;
7518 Ops.push_back(Chain);
7519 Ops.push_back(Callee);
7520
7521 // Add argument registers to the end of the list so that they are
7522 // known live into the call.
7523 for (auto &Reg : RegsToPass)
7524 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
7525
7526 if (!IsTailCall) {
7527 // Add a register mask operand representing the call-preserved registers.
7528 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
7529 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
7530 assert(Mask && "Missing call preserved mask for calling convention");
7531 Ops.push_back(DAG.getRegisterMask(Mask));
7532 }
7533
7534 // Glue the call to the argument copies, if any.
7535 if (Glue.getNode())
7536 Ops.push_back(Glue);
7537
7538 // Emit the call.
7539 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7540 unsigned Op;
7541 switch (DAG.getTarget().getCodeModel()) {
7542 default:
7543 report_fatal_error("Unsupported code model");
7544 case CodeModel::Small:
7545 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
7546 break;
7547 case CodeModel::Medium:
7548 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
7549 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
7550 break;
7551 case CodeModel::Large:
7552 assert(Subtarget.is64Bit() && "Large code model requires LA64");
7553 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
7554 break;
7555 }
7556
7557 if (IsTailCall) {
7558 MF.getFrameInfo().setHasTailCall();
7559 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
7560 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
7561 return Ret;
7562 }
7563
7564 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
7565 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
7566 Glue = Chain.getValue(1);
7567
7568 // Mark the end of the call, which is glued to the call itself.
7569 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
7570 Glue = Chain.getValue(1);
7571
7572 // Assign locations to each value returned by this call.
7573 SmallVector<CCValAssign> RVLocs;
7574 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
7575 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
7576
7577 // Copy all of the result registers out of their specified physreg.
7578 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
7579 auto &VA = RVLocs[i];
7580 // Copy the value out.
7581 SDValue RetValue =
7582 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
7583 // Glue the RetValue to the end of the call sequence.
7584 Chain = RetValue.getValue(1);
7585 Glue = RetValue.getValue(2);
7586
7587 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7588 assert(VA.needsCustom());
7589 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
7590 MVT::i32, Glue);
7591 Chain = RetValue2.getValue(1);
7592 Glue = RetValue2.getValue(2);
7593 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
7594 RetValue, RetValue2);
7595 } else
7596 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
7597
7598 InVals.push_back(RetValue);
7599 }
7600
7601 return Chain;
7602 }
7603
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context,const Type * RetTy) const7604 bool LoongArchTargetLowering::CanLowerReturn(
7605 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
7606 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
7607 const Type *RetTy) const {
7608 SmallVector<CCValAssign> RVLocs;
7609 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
7610
7611 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7612 LoongArchABI::ABI ABI =
7613 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7614 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
7615 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
7616 nullptr))
7617 return false;
7618 }
7619 return true;
7620 }
7621
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const7622 SDValue LoongArchTargetLowering::LowerReturn(
7623 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7624 const SmallVectorImpl<ISD::OutputArg> &Outs,
7625 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
7626 SelectionDAG &DAG) const {
7627 // Stores the assignment of the return value to a location.
7628 SmallVector<CCValAssign> RVLocs;
7629
7630 // Info about the registers and stack slot.
7631 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
7632 *DAG.getContext());
7633
7634 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
7635 nullptr, CC_LoongArch);
7636 if (CallConv == CallingConv::GHC && !RVLocs.empty())
7637 report_fatal_error("GHC functions return void only");
7638 SDValue Glue;
7639 SmallVector<SDValue, 4> RetOps(1, Chain);
7640
7641 // Copy the result values into the output registers.
7642 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
7643 SDValue Val = OutVals[OutIdx];
7644 CCValAssign &VA = RVLocs[i];
7645 assert(VA.isRegLoc() && "Can only return in registers!");
7646
7647 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7648 // Handle returning f64 on LA32D with a soft float ABI.
7649 assert(VA.isRegLoc() && "Expected return via registers");
7650 assert(VA.needsCustom());
7651 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
7652 DAG.getVTList(MVT::i32, MVT::i32), Val);
7653 SDValue Lo = SplitF64.getValue(0);
7654 SDValue Hi = SplitF64.getValue(1);
7655 Register RegLo = VA.getLocReg();
7656 Register RegHi = RVLocs[++i].getLocReg();
7657
7658 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
7659 Glue = Chain.getValue(1);
7660 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
7661 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
7662 Glue = Chain.getValue(1);
7663 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
7664 } else {
7665 // Handle a 'normal' return.
7666 Val = convertValVTToLocVT(DAG, Val, VA, DL);
7667 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
7668
7669 // Guarantee that all emitted copies are stuck together.
7670 Glue = Chain.getValue(1);
7671 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7672 }
7673 }
7674
7675 RetOps[0] = Chain; // Update chain.
7676
7677 // Add the glue node if we have it.
7678 if (Glue.getNode())
7679 RetOps.push_back(Glue);
7680
7681 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
7682 }
7683
isFPImmVLDILegal(const APFloat & Imm,EVT VT) const7684 bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
7685 EVT VT) const {
7686 if (!Subtarget.hasExtLSX())
7687 return false;
7688
7689 if (VT == MVT::f32) {
7690 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
7691 return (masked == 0x3e000000 || masked == 0x40000000);
7692 }
7693
7694 if (VT == MVT::f64) {
7695 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
7696 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
7697 }
7698
7699 return false;
7700 }
7701
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const7702 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
7703 bool ForCodeSize) const {
7704 // TODO: Maybe need more checks here after vector extension is supported.
7705 if (VT == MVT::f32 && !Subtarget.hasBasicF())
7706 return false;
7707 if (VT == MVT::f64 && !Subtarget.hasBasicD())
7708 return false;
7709 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
7710 }
7711
isCheapToSpeculateCttz(Type *) const7712 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
7713 return true;
7714 }
7715
isCheapToSpeculateCtlz(Type *) const7716 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
7717 return true;
7718 }
7719
shouldInsertFencesForAtomic(const Instruction * I) const7720 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
7721 const Instruction *I) const {
7722 if (!Subtarget.is64Bit())
7723 return isa<LoadInst>(I) || isa<StoreInst>(I);
7724
7725 if (isa<LoadInst>(I))
7726 return true;
7727
7728 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
7729 // require fences beacuse we can use amswap_db.[w/d].
7730 Type *Ty = I->getOperand(0)->getType();
7731 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
7732 unsigned Size = Ty->getIntegerBitWidth();
7733 return (Size == 8 || Size == 16);
7734 }
7735
7736 return false;
7737 }
7738
getSetCCResultType(const DataLayout & DL,LLVMContext & Context,EVT VT) const7739 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
7740 LLVMContext &Context,
7741 EVT VT) const {
7742 if (!VT.isVector())
7743 return getPointerTy(DL);
7744 return VT.changeVectorElementTypeToInteger();
7745 }
7746
hasAndNot(SDValue Y) const7747 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
7748 // TODO: Support vectors.
7749 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
7750 }
7751
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const7752 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
7753 const CallInst &I,
7754 MachineFunction &MF,
7755 unsigned Intrinsic) const {
7756 switch (Intrinsic) {
7757 default:
7758 return false;
7759 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
7760 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
7761 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
7762 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
7763 Info.opc = ISD::INTRINSIC_W_CHAIN;
7764 Info.memVT = MVT::i32;
7765 Info.ptrVal = I.getArgOperand(0);
7766 Info.offset = 0;
7767 Info.align = Align(4);
7768 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
7769 MachineMemOperand::MOVolatile;
7770 return true;
7771 // TODO: Add more Intrinsics later.
7772 }
7773 }
7774
7775 // When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
7776 // atomicrmw and/or/xor operations with operands less than 32 bits cannot be
7777 // expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
7778 // regression, we need to implement it manually.
emitExpandAtomicRMW(AtomicRMWInst * AI) const7779 void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
7780 AtomicRMWInst::BinOp Op = AI->getOperation();
7781
7782 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
7783 Op == AtomicRMWInst::And) &&
7784 "Unable to expand");
7785 unsigned MinWordSize = 4;
7786
7787 IRBuilder<> Builder(AI);
7788 LLVMContext &Ctx = Builder.getContext();
7789 const DataLayout &DL = AI->getDataLayout();
7790 Type *ValueType = AI->getType();
7791 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
7792
7793 Value *Addr = AI->getPointerOperand();
7794 PointerType *PtrTy = cast<PointerType>(Addr->getType());
7795 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
7796
7797 Value *AlignedAddr = Builder.CreateIntrinsic(
7798 Intrinsic::ptrmask, {PtrTy, IntTy},
7799 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
7800 "AlignedAddr");
7801
7802 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
7803 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
7804 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
7805 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
7806 Value *Mask = Builder.CreateShl(
7807 ConstantInt::get(WordType,
7808 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
7809 ShiftAmt, "Mask");
7810 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
7811 Value *ValOperand_Shifted =
7812 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
7813 ShiftAmt, "ValOperand_Shifted");
7814 Value *NewOperand;
7815 if (Op == AtomicRMWInst::And)
7816 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
7817 else
7818 NewOperand = ValOperand_Shifted;
7819
7820 AtomicRMWInst *NewAI =
7821 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
7822 AI->getOrdering(), AI->getSyncScopeID());
7823
7824 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
7825 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
7826 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
7827 AI->replaceAllUsesWith(FinalOldResult);
7828 AI->eraseFromParent();
7829 }
7830
7831 TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const7832 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
7833 // TODO: Add more AtomicRMWInst that needs to be extended.
7834
7835 // Since floating-point operation requires a non-trivial set of data
7836 // operations, use CmpXChg to expand.
7837 if (AI->isFloatingPointOperation() ||
7838 AI->getOperation() == AtomicRMWInst::UIncWrap ||
7839 AI->getOperation() == AtomicRMWInst::UDecWrap ||
7840 AI->getOperation() == AtomicRMWInst::USubCond ||
7841 AI->getOperation() == AtomicRMWInst::USubSat)
7842 return AtomicExpansionKind::CmpXChg;
7843
7844 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
7845 (AI->getOperation() == AtomicRMWInst::Xchg ||
7846 AI->getOperation() == AtomicRMWInst::Add ||
7847 AI->getOperation() == AtomicRMWInst::Sub)) {
7848 return AtomicExpansionKind::None;
7849 }
7850
7851 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
7852 if (Subtarget.hasLAMCAS()) {
7853 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
7854 AI->getOperation() == AtomicRMWInst::Or ||
7855 AI->getOperation() == AtomicRMWInst::Xor))
7856 return AtomicExpansionKind::Expand;
7857 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
7858 return AtomicExpansionKind::CmpXChg;
7859 }
7860
7861 if (Size == 8 || Size == 16)
7862 return AtomicExpansionKind::MaskedIntrinsic;
7863 return AtomicExpansionKind::None;
7864 }
7865
7866 static Intrinsic::ID
getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,AtomicRMWInst::BinOp BinOp)7867 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
7868 AtomicRMWInst::BinOp BinOp) {
7869 if (GRLen == 64) {
7870 switch (BinOp) {
7871 default:
7872 llvm_unreachable("Unexpected AtomicRMW BinOp");
7873 case AtomicRMWInst::Xchg:
7874 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
7875 case AtomicRMWInst::Add:
7876 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
7877 case AtomicRMWInst::Sub:
7878 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
7879 case AtomicRMWInst::Nand:
7880 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
7881 case AtomicRMWInst::UMax:
7882 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
7883 case AtomicRMWInst::UMin:
7884 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
7885 case AtomicRMWInst::Max:
7886 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
7887 case AtomicRMWInst::Min:
7888 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
7889 // TODO: support other AtomicRMWInst.
7890 }
7891 }
7892
7893 if (GRLen == 32) {
7894 switch (BinOp) {
7895 default:
7896 llvm_unreachable("Unexpected AtomicRMW BinOp");
7897 case AtomicRMWInst::Xchg:
7898 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
7899 case AtomicRMWInst::Add:
7900 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
7901 case AtomicRMWInst::Sub:
7902 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
7903 case AtomicRMWInst::Nand:
7904 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
7905 case AtomicRMWInst::UMax:
7906 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
7907 case AtomicRMWInst::UMin:
7908 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
7909 case AtomicRMWInst::Max:
7910 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
7911 case AtomicRMWInst::Min:
7912 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
7913 // TODO: support other AtomicRMWInst.
7914 }
7915 }
7916
7917 llvm_unreachable("Unexpected GRLen\n");
7918 }
7919
7920 TargetLowering::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst * CI) const7921 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
7922 AtomicCmpXchgInst *CI) const {
7923
7924 if (Subtarget.hasLAMCAS())
7925 return AtomicExpansionKind::None;
7926
7927 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
7928 if (Size == 8 || Size == 16)
7929 return AtomicExpansionKind::MaskedIntrinsic;
7930 return AtomicExpansionKind::None;
7931 }
7932
emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase & Builder,AtomicCmpXchgInst * CI,Value * AlignedAddr,Value * CmpVal,Value * NewVal,Value * Mask,AtomicOrdering Ord) const7933 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
7934 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
7935 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
7936 unsigned GRLen = Subtarget.getGRLen();
7937 AtomicOrdering FailOrd = CI->getFailureOrdering();
7938 Value *FailureOrdering =
7939 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
7940 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
7941 if (GRLen == 64) {
7942 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
7943 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
7944 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
7945 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
7946 }
7947 Type *Tys[] = {AlignedAddr->getType()};
7948 Value *Result = Builder.CreateIntrinsic(
7949 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
7950 if (GRLen == 64)
7951 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
7952 return Result;
7953 }
7954
emitMaskedAtomicRMWIntrinsic(IRBuilderBase & Builder,AtomicRMWInst * AI,Value * AlignedAddr,Value * Incr,Value * Mask,Value * ShiftAmt,AtomicOrdering Ord) const7955 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
7956 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
7957 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
7958 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
7959 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
7960 // mask, as this produces better code than the LL/SC loop emitted by
7961 // int_loongarch_masked_atomicrmw_xchg.
7962 if (AI->getOperation() == AtomicRMWInst::Xchg &&
7963 isa<ConstantInt>(AI->getValOperand())) {
7964 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
7965 if (CVal->isZero())
7966 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
7967 Builder.CreateNot(Mask, "Inv_Mask"),
7968 AI->getAlign(), Ord);
7969 if (CVal->isMinusOne())
7970 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
7971 AI->getAlign(), Ord);
7972 }
7973
7974 unsigned GRLen = Subtarget.getGRLen();
7975 Value *Ordering =
7976 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
7977 Type *Tys[] = {AlignedAddr->getType()};
7978 Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration(
7979 AI->getModule(),
7980 getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
7981
7982 if (GRLen == 64) {
7983 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
7984 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
7985 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
7986 }
7987
7988 Value *Result;
7989
7990 // Must pass the shift amount needed to sign extend the loaded value prior
7991 // to performing a signed comparison for min/max. ShiftAmt is the number of
7992 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
7993 // is the number of bits to left+right shift the value in order to
7994 // sign-extend.
7995 if (AI->getOperation() == AtomicRMWInst::Min ||
7996 AI->getOperation() == AtomicRMWInst::Max) {
7997 const DataLayout &DL = AI->getDataLayout();
7998 unsigned ValWidth =
7999 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8000 Value *SextShamt =
8001 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8002 Result = Builder.CreateCall(LlwOpScwLoop,
8003 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8004 } else {
8005 Result =
8006 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8007 }
8008
8009 if (GRLen == 64)
8010 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8011 return Result;
8012 }
8013
isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT VT) const8014 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
8015 const MachineFunction &MF, EVT VT) const {
8016 VT = VT.getScalarType();
8017
8018 if (!VT.isSimple())
8019 return false;
8020
8021 switch (VT.getSimpleVT().SimpleTy) {
8022 case MVT::f32:
8023 case MVT::f64:
8024 return true;
8025 default:
8026 break;
8027 }
8028
8029 return false;
8030 }
8031
getExceptionPointerRegister(const Constant * PersonalityFn) const8032 Register LoongArchTargetLowering::getExceptionPointerRegister(
8033 const Constant *PersonalityFn) const {
8034 return LoongArch::R4;
8035 }
8036
getExceptionSelectorRegister(const Constant * PersonalityFn) const8037 Register LoongArchTargetLowering::getExceptionSelectorRegister(
8038 const Constant *PersonalityFn) const {
8039 return LoongArch::R5;
8040 }
8041
8042 //===----------------------------------------------------------------------===//
8043 // Target Optimization Hooks
8044 //===----------------------------------------------------------------------===//
8045
getEstimateRefinementSteps(EVT VT,const LoongArchSubtarget & Subtarget)8046 static int getEstimateRefinementSteps(EVT VT,
8047 const LoongArchSubtarget &Subtarget) {
8048 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8049 // IEEE float has 23 digits and double has 52 digits.
8050 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8051 return RefinementSteps;
8052 }
8053
getSqrtEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps,bool & UseOneConstNR,bool Reciprocal) const8054 SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
8055 SelectionDAG &DAG, int Enabled,
8056 int &RefinementSteps,
8057 bool &UseOneConstNR,
8058 bool Reciprocal) const {
8059 if (Subtarget.hasFrecipe()) {
8060 SDLoc DL(Operand);
8061 EVT VT = Operand.getValueType();
8062
8063 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8064 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8065 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8066 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8067 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8068
8069 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8070 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8071
8072 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
8073 if (Reciprocal)
8074 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
8075
8076 return Estimate;
8077 }
8078 }
8079
8080 return SDValue();
8081 }
8082
getRecipEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps) const8083 SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
8084 SelectionDAG &DAG,
8085 int Enabled,
8086 int &RefinementSteps) const {
8087 if (Subtarget.hasFrecipe()) {
8088 SDLoc DL(Operand);
8089 EVT VT = Operand.getValueType();
8090
8091 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8092 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8093 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8094 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8095 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8096
8097 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8098 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8099
8100 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
8101 }
8102 }
8103
8104 return SDValue();
8105 }
8106
8107 //===----------------------------------------------------------------------===//
8108 // LoongArch Inline Assembly Support
8109 //===----------------------------------------------------------------------===//
8110
8111 LoongArchTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const8112 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8113 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8114 //
8115 // 'f': A floating-point register (if available).
8116 // 'k': A memory operand whose address is formed by a base register and
8117 // (optionally scaled) index register.
8118 // 'l': A signed 16-bit constant.
8119 // 'm': A memory operand whose address is formed by a base register and
8120 // offset that is suitable for use in instructions with the same
8121 // addressing mode as st.w and ld.w.
8122 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8123 // instruction)
8124 // 'I': A signed 12-bit constant (for arithmetic instructions).
8125 // 'J': Integer zero.
8126 // 'K': An unsigned 12-bit constant (for logic instructions).
8127 // "ZB": An address that is held in a general-purpose register. The offset is
8128 // zero.
8129 // "ZC": A memory operand whose address is formed by a base register and
8130 // offset that is suitable for use in instructions with the same
8131 // addressing mode as ll.w and sc.w.
8132 if (Constraint.size() == 1) {
8133 switch (Constraint[0]) {
8134 default:
8135 break;
8136 case 'f':
8137 case 'q':
8138 return C_RegisterClass;
8139 case 'l':
8140 case 'I':
8141 case 'J':
8142 case 'K':
8143 return C_Immediate;
8144 case 'k':
8145 return C_Memory;
8146 }
8147 }
8148
8149 if (Constraint == "ZC" || Constraint == "ZB")
8150 return C_Memory;
8151
8152 // 'm' is handled here.
8153 return TargetLowering::getConstraintType(Constraint);
8154 }
8155
getInlineAsmMemConstraint(StringRef ConstraintCode) const8156 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8157 StringRef ConstraintCode) const {
8158 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8159 .Case("k", InlineAsm::ConstraintCode::k)
8160 .Case("ZB", InlineAsm::ConstraintCode::ZB)
8161 .Case("ZC", InlineAsm::ConstraintCode::ZC)
8162 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
8163 }
8164
8165 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const8166 LoongArchTargetLowering::getRegForInlineAsmConstraint(
8167 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8168 // First, see if this is a constraint that directly corresponds to a LoongArch
8169 // register class.
8170 if (Constraint.size() == 1) {
8171 switch (Constraint[0]) {
8172 case 'r':
8173 // TODO: Support fixed vectors up to GRLen?
8174 if (VT.isVector())
8175 break;
8176 return std::make_pair(0U, &LoongArch::GPRRegClass);
8177 case 'q':
8178 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
8179 case 'f':
8180 if (Subtarget.hasBasicF() && VT == MVT::f32)
8181 return std::make_pair(0U, &LoongArch::FPR32RegClass);
8182 if (Subtarget.hasBasicD() && VT == MVT::f64)
8183 return std::make_pair(0U, &LoongArch::FPR64RegClass);
8184 if (Subtarget.hasExtLSX() &&
8185 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
8186 return std::make_pair(0U, &LoongArch::LSX128RegClass);
8187 if (Subtarget.hasExtLASX() &&
8188 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
8189 return std::make_pair(0U, &LoongArch::LASX256RegClass);
8190 break;
8191 default:
8192 break;
8193 }
8194 }
8195
8196 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8197 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8198 // constraints while the official register name is prefixed with a '$'. So we
8199 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8200 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8201 // case insensitive, so no need to convert the constraint to upper case here.
8202 //
8203 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8204 // decode the usage of register name aliases into their official names. And
8205 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8206 // official register names.
8207 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
8208 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
8209 bool IsFP = Constraint[2] == 'f';
8210 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
8211 std::pair<unsigned, const TargetRegisterClass *> R;
8212 R = TargetLowering::getRegForInlineAsmConstraint(
8213 TRI, join_items("", Temp.first, Temp.second), VT);
8214 // Match those names to the widest floating point register type available.
8215 if (IsFP) {
8216 unsigned RegNo = R.first;
8217 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8218 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8219 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8220 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
8221 }
8222 }
8223 }
8224 return R;
8225 }
8226
8227 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8228 }
8229
LowerAsmOperandForConstraint(SDValue Op,StringRef Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const8230 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8231 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8232 SelectionDAG &DAG) const {
8233 // Currently only support length 1 constraints.
8234 if (Constraint.size() == 1) {
8235 switch (Constraint[0]) {
8236 case 'l':
8237 // Validate & create a 16-bit signed immediate operand.
8238 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8239 uint64_t CVal = C->getSExtValue();
8240 if (isInt<16>(CVal))
8241 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8242 Subtarget.getGRLenVT()));
8243 }
8244 return;
8245 case 'I':
8246 // Validate & create a 12-bit signed immediate operand.
8247 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8248 uint64_t CVal = C->getSExtValue();
8249 if (isInt<12>(CVal))
8250 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8251 Subtarget.getGRLenVT()));
8252 }
8253 return;
8254 case 'J':
8255 // Validate & create an integer zero operand.
8256 if (auto *C = dyn_cast<ConstantSDNode>(Op))
8257 if (C->getZExtValue() == 0)
8258 Ops.push_back(
8259 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
8260 return;
8261 case 'K':
8262 // Validate & create a 12-bit unsigned immediate operand.
8263 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8264 uint64_t CVal = C->getZExtValue();
8265 if (isUInt<12>(CVal))
8266 Ops.push_back(
8267 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
8268 }
8269 return;
8270 default:
8271 break;
8272 }
8273 }
8274 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8275 }
8276
8277 #define GET_REGISTER_MATCHER
8278 #include "LoongArchGenAsmMatcher.inc"
8279
8280 Register
getRegisterByName(const char * RegName,LLT VT,const MachineFunction & MF) const8281 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
8282 const MachineFunction &MF) const {
8283 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
8284 std::string NewRegName = Name.second.str();
8285 Register Reg = MatchRegisterAltName(NewRegName);
8286 if (!Reg)
8287 Reg = MatchRegisterName(NewRegName);
8288 if (!Reg)
8289 return Reg;
8290 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8291 if (!ReservedRegs.test(Reg))
8292 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8293 StringRef(RegName) + "\"."));
8294 return Reg;
8295 }
8296
decomposeMulByConstant(LLVMContext & Context,EVT VT,SDValue C) const8297 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
8298 EVT VT, SDValue C) const {
8299 // TODO: Support vectors.
8300 if (!VT.isScalarInteger())
8301 return false;
8302
8303 // Omit the optimization if the data size exceeds GRLen.
8304 if (VT.getSizeInBits() > Subtarget.getGRLen())
8305 return false;
8306
8307 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8308 const APInt &Imm = ConstNode->getAPIntValue();
8309 // Break MUL into (SLLI + ADD/SUB) or ALSL.
8310 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8311 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8312 return true;
8313 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8314 if (ConstNode->hasOneUse() &&
8315 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
8316 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
8317 return true;
8318 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8319 // in which the immediate has two set bits. Or Break (MUL x, imm)
8320 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8321 // equals to (1 << s0) - (1 << s1).
8322 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
8323 unsigned Shifts = Imm.countr_zero();
8324 // Reject immediates which can be composed via a single LUI.
8325 if (Shifts >= 12)
8326 return false;
8327 // Reject multiplications can be optimized to
8328 // (SLLI (ALSL x, x, 1/2/3/4), s).
8329 APInt ImmPop = Imm.ashr(Shifts);
8330 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
8331 return false;
8332 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8333 // since it needs one more instruction than other 3 cases.
8334 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
8335 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
8336 (ImmSmall - Imm).isPowerOf2())
8337 return true;
8338 }
8339 }
8340
8341 return false;
8342 }
8343
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const8344 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
8345 const AddrMode &AM,
8346 Type *Ty, unsigned AS,
8347 Instruction *I) const {
8348 // LoongArch has four basic addressing modes:
8349 // 1. reg
8350 // 2. reg + 12-bit signed offset
8351 // 3. reg + 14-bit signed offset left-shifted by 2
8352 // 4. reg1 + reg2
8353 // TODO: Add more checks after support vector extension.
8354
8355 // No global is ever allowed as a base.
8356 if (AM.BaseGV)
8357 return false;
8358
8359 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8360 // with `UAL` feature.
8361 if (!isInt<12>(AM.BaseOffs) &&
8362 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
8363 return false;
8364
8365 switch (AM.Scale) {
8366 case 0:
8367 // "r+i" or just "i", depending on HasBaseReg.
8368 break;
8369 case 1:
8370 // "r+r+i" is not allowed.
8371 if (AM.HasBaseReg && AM.BaseOffs)
8372 return false;
8373 // Otherwise we have "r+r" or "r+i".
8374 break;
8375 case 2:
8376 // "2*r+r" or "2*r+i" is not allowed.
8377 if (AM.HasBaseReg || AM.BaseOffs)
8378 return false;
8379 // Allow "2*r" as "r+r".
8380 break;
8381 default:
8382 return false;
8383 }
8384
8385 return true;
8386 }
8387
isLegalICmpImmediate(int64_t Imm) const8388 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
8389 return isInt<12>(Imm);
8390 }
8391
isLegalAddImmediate(int64_t Imm) const8392 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
8393 return isInt<12>(Imm);
8394 }
8395
isZExtFree(SDValue Val,EVT VT2) const8396 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
8397 // Zexts are free if they can be combined with a load.
8398 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
8399 // poorly with type legalization of compares preferring sext.
8400 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
8401 EVT MemVT = LD->getMemoryVT();
8402 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
8403 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
8404 LD->getExtensionType() == ISD::ZEXTLOAD))
8405 return true;
8406 }
8407
8408 return TargetLowering::isZExtFree(Val, VT2);
8409 }
8410
isSExtCheaperThanZExt(EVT SrcVT,EVT DstVT) const8411 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
8412 EVT DstVT) const {
8413 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
8414 }
8415
signExtendConstant(const ConstantInt * CI) const8416 bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
8417 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
8418 }
8419
hasAndNotCompare(SDValue Y) const8420 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
8421 // TODO: Support vectors.
8422 if (Y.getValueType().isVector())
8423 return false;
8424
8425 return !isa<ConstantSDNode>(Y);
8426 }
8427
getExtendForAtomicCmpSwapArg() const8428 ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
8429 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
8430 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
8431 }
8432
shouldSignExtendTypeInLibCall(Type * Ty,bool IsSigned) const8433 bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
8434 Type *Ty, bool IsSigned) const {
8435 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
8436 return true;
8437
8438 return IsSigned;
8439 }
8440
shouldExtendTypeInLibCall(EVT Type) const8441 bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
8442 // Return false to suppress the unnecessary extensions if the LibCall
8443 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
8444 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
8445 Type.getSizeInBits() < Subtarget.getGRLen()))
8446 return false;
8447 return true;
8448 }
8449
8450 // memcpy, and other memory intrinsics, typically tries to use wider load/store
8451 // if the source/dest is aligned and the copy size is large enough. We therefore
8452 // want to align such objects passed to memory intrinsics.
shouldAlignPointerArgs(CallInst * CI,unsigned & MinSize,Align & PrefAlign) const8453 bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
8454 unsigned &MinSize,
8455 Align &PrefAlign) const {
8456 if (!isa<MemIntrinsic>(CI))
8457 return false;
8458
8459 if (Subtarget.is64Bit()) {
8460 MinSize = 8;
8461 PrefAlign = Align(8);
8462 } else {
8463 MinSize = 4;
8464 PrefAlign = Align(4);
8465 }
8466
8467 return true;
8468 }
8469
8470 TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const8471 LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
8472 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
8473 VT.getVectorElementType() != MVT::i1)
8474 return TypeWidenVector;
8475
8476 return TargetLoweringBase::getPreferredVectorAction(VT);
8477 }
8478
splitValueIntoRegisterParts(SelectionDAG & DAG,const SDLoc & DL,SDValue Val,SDValue * Parts,unsigned NumParts,MVT PartVT,std::optional<CallingConv::ID> CC) const8479 bool LoongArchTargetLowering::splitValueIntoRegisterParts(
8480 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8481 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
8482 bool IsABIRegCopy = CC.has_value();
8483 EVT ValueVT = Val.getValueType();
8484
8485 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8486 PartVT == MVT::f32) {
8487 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
8488 // nan, and cast to f32.
8489 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
8490 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
8491 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
8492 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
8493 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
8494 Parts[0] = Val;
8495 return true;
8496 }
8497
8498 return false;
8499 }
8500
joinRegisterPartsIntoValue(SelectionDAG & DAG,const SDLoc & DL,const SDValue * Parts,unsigned NumParts,MVT PartVT,EVT ValueVT,std::optional<CallingConv::ID> CC) const8501 SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
8502 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
8503 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
8504 bool IsABIRegCopy = CC.has_value();
8505
8506 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8507 PartVT == MVT::f32) {
8508 SDValue Val = Parts[0];
8509
8510 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
8511 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
8512 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
8513 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
8514 return Val;
8515 }
8516
8517 return SDValue();
8518 }
8519
getRegisterTypeForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT) const8520 MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
8521 CallingConv::ID CC,
8522 EVT VT) const {
8523 // Use f32 to pass f16.
8524 if (VT == MVT::f16 && Subtarget.hasBasicF())
8525 return MVT::f32;
8526
8527 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
8528 }
8529
getNumRegistersForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT) const8530 unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
8531 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
8532 // Use f32 to pass f16.
8533 if (VT == MVT::f16 && Subtarget.hasBasicF())
8534 return 1;
8535
8536 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
8537 }
8538
SimplifyDemandedBitsForTargetNode(SDValue Op,const APInt & OriginalDemandedBits,const APInt & OriginalDemandedElts,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth) const8539 bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
8540 SDValue Op, const APInt &OriginalDemandedBits,
8541 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
8542 unsigned Depth) const {
8543 EVT VT = Op.getValueType();
8544 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
8545 unsigned Opc = Op.getOpcode();
8546 switch (Opc) {
8547 default:
8548 break;
8549 case LoongArchISD::VMSKLTZ:
8550 case LoongArchISD::XVMSKLTZ: {
8551 SDValue Src = Op.getOperand(0);
8552 MVT SrcVT = Src.getSimpleValueType();
8553 unsigned SrcBits = SrcVT.getScalarSizeInBits();
8554 unsigned NumElts = SrcVT.getVectorNumElements();
8555
8556 // If we don't need the sign bits at all just return zero.
8557 if (OriginalDemandedBits.countr_zero() >= NumElts)
8558 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
8559
8560 // Only demand the vector elements of the sign bits we need.
8561 APInt KnownUndef, KnownZero;
8562 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
8563 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
8564 TLO, Depth + 1))
8565 return true;
8566
8567 Known.Zero = KnownZero.zext(BitWidth);
8568 Known.Zero.setHighBits(BitWidth - NumElts);
8569
8570 // [X]VMSKLTZ only uses the MSB from each vector element.
8571 KnownBits KnownSrc;
8572 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
8573 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
8574 Depth + 1))
8575 return true;
8576
8577 if (KnownSrc.One[SrcBits - 1])
8578 Known.One.setLowBits(NumElts);
8579 else if (KnownSrc.Zero[SrcBits - 1])
8580 Known.Zero.setLowBits(NumElts);
8581
8582 // Attempt to avoid multi-use ops if we don't need anything from it.
8583 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
8584 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
8585 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
8586 return false;
8587 }
8588 }
8589
8590 return TargetLowering::SimplifyDemandedBitsForTargetNode(
8591 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
8592 }
8593