1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "LoongArchTargetMachine.h"
20 #include "MCTargetDesc/LoongArchBaseInfo.h"
21 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/CodeGen/ISDOpcodes.h"
25 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicsLoongArch.h"
29 #include "llvm/Support/CodeGen.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
34
35 using namespace llvm;
36
37 #define DEBUG_TYPE "loongarch-isel-lowering"
38
39 STATISTIC(NumTailCalls, "Number of tail calls");
40
41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
LoongArchTargetLowering(const TargetMachine & TM,const LoongArchSubtarget & STI)45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
74 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
75 MVT::i1, Promote);
76
77 setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
78 setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
79 setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
80 setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
81 setOperationAction(ISD::ROTL, GRLenVT, Expand);
82 setOperationAction(ISD::CTPOP, GRLenVT, Expand);
83
84 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
85 ISD::JumpTable, ISD::GlobalTLSAddress},
86 GRLenVT, Custom);
87
88 setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
89
90 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
91 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
92 setOperationAction(ISD::VASTART, MVT::Other, Custom);
93 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
94
95 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
96 setOperationAction(ISD::TRAP, MVT::Other, Legal);
97
98 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
99 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
100 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
104 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
105 setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
110 setOperationAction(ISD::BSWAP, MVT::i16, Custom);
111
112 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
113 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
114 setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
115 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
116 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
117
118 setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
119 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
124 setOperationAction(ISD::ADD, MVT::i32, Custom);
125 setOperationAction(ISD::SUB, MVT::i32, Custom);
126 setOperationAction(ISD::SHL, MVT::i32, Custom);
127 setOperationAction(ISD::SRA, MVT::i32, Custom);
128 setOperationAction(ISD::SRL, MVT::i32, Custom);
129 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
130 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
131 setOperationAction(ISD::ROTR, MVT::i32, Custom);
132 setOperationAction(ISD::ROTL, MVT::i32, Custom);
133 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
134 setOperationAction(ISD::CTLZ, MVT::i32, Custom);
135 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
136 setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
137 setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
138 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
139 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
140 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
141
142 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
143 setOperationAction(ISD::BSWAP, MVT::i32, Custom);
144 setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom);
145 }
146
147 // Set operations for LA32 only.
148
149 if (!Subtarget.is64Bit()) {
150 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
151 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
152 setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
153 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
154 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
155 }
156
157 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
158
159 static const ISD::CondCode FPCCToExpand[] = {
160 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
161 ISD::SETGE, ISD::SETNE, ISD::SETGT};
162
163 // Set operations for 'F' feature.
164
165 if (Subtarget.hasBasicF()) {
166 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
167 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
168 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
169
170 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
171 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
172 setOperationAction(ISD::FMA, MVT::f32, Legal);
173 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
174 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
175 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
176 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
177 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
178 setOperationAction(ISD::FSIN, MVT::f32, Expand);
179 setOperationAction(ISD::FCOS, MVT::f32, Expand);
180 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
181 setOperationAction(ISD::FPOW, MVT::f32, Expand);
182 setOperationAction(ISD::FREM, MVT::f32, Expand);
183 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
184 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
185
186 if (Subtarget.is64Bit())
187 setOperationAction(ISD::FRINT, MVT::f32, Legal);
188
189 if (!Subtarget.hasBasicD()) {
190 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
191 if (Subtarget.is64Bit()) {
192 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
193 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
194 }
195 }
196 }
197
198 // Set operations for 'D' feature.
199
200 if (Subtarget.hasBasicD()) {
201 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
204 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
205 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
206
207 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
208 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
209 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
210 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
211 setOperationAction(ISD::FMA, MVT::f64, Legal);
212 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
213 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
214 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
215 setOperationAction(ISD::FSIN, MVT::f64, Expand);
216 setOperationAction(ISD::FCOS, MVT::f64, Expand);
217 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
218 setOperationAction(ISD::FPOW, MVT::f64, Expand);
219 setOperationAction(ISD::FREM, MVT::f64, Expand);
220 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
221 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
222
223 if (Subtarget.is64Bit())
224 setOperationAction(ISD::FRINT, MVT::f64, Legal);
225 }
226
227 // Set operations for 'LSX' feature.
228
229 if (Subtarget.hasExtLSX()) {
230 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
231 // Expand all truncating stores and extending loads.
232 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
233 setTruncStoreAction(VT, InnerVT, Expand);
234 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
235 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
236 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
237 }
238 // By default everything must be expanded. Then we will selectively turn
239 // on ones that can be effectively codegen'd.
240 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
241 setOperationAction(Op, VT, Expand);
242 }
243
244 for (MVT VT : LSXVTs) {
245 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
246 setOperationAction(ISD::BITCAST, VT, Legal);
247 setOperationAction(ISD::UNDEF, VT, Legal);
248
249 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
250 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
251 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
252
253 setOperationAction(ISD::SETCC, VT, Legal);
254 setOperationAction(ISD::VSELECT, VT, Legal);
255 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
256 }
257 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
258 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
259 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
260 Legal);
261 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
262 VT, Legal);
263 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
264 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
265 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
266 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
267 setCondCodeAction(
268 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
269 Expand);
270 }
271 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
272 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
273 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
274 }
275 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
276 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
277 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
278 setOperationAction(ISD::FMA, VT, Legal);
279 setOperationAction(ISD::FSQRT, VT, Legal);
280 setOperationAction(ISD::FNEG, VT, Legal);
281 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
282 ISD::SETUGE, ISD::SETUGT},
283 VT, Expand);
284 }
285 }
286
287 // Set operations for 'LASX' feature.
288
289 if (Subtarget.hasExtLASX()) {
290 for (MVT VT : LASXVTs) {
291 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
292 setOperationAction(ISD::BITCAST, VT, Legal);
293 setOperationAction(ISD::UNDEF, VT, Legal);
294
295 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
296 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
297 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
298 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
299
300 setOperationAction(ISD::SETCC, VT, Legal);
301 setOperationAction(ISD::VSELECT, VT, Legal);
302 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
303 }
304 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
305 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
306 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
307 Legal);
308 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
309 VT, Legal);
310 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
311 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
312 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
313 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
314 setCondCodeAction(
315 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
316 Expand);
317 }
318 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
319 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
320 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
321 }
322 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
323 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
324 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
325 setOperationAction(ISD::FMA, VT, Legal);
326 setOperationAction(ISD::FSQRT, VT, Legal);
327 setOperationAction(ISD::FNEG, VT, Legal);
328 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
329 ISD::SETUGE, ISD::SETUGT},
330 VT, Expand);
331 }
332 }
333
334 // Set DAG combine for LA32 and LA64.
335
336 setTargetDAGCombine(ISD::AND);
337 setTargetDAGCombine(ISD::OR);
338 setTargetDAGCombine(ISD::SRL);
339 setTargetDAGCombine(ISD::SETCC);
340
341 // Set DAG combine for 'LSX' feature.
342
343 if (Subtarget.hasExtLSX())
344 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
345
346 // Compute derived properties from the register classes.
347 computeRegisterProperties(Subtarget.getRegisterInfo());
348
349 setStackPointerRegisterToSaveRestore(LoongArch::R3);
350
351 setBooleanContents(ZeroOrOneBooleanContent);
352 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
353
354 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
355
356 setMinCmpXchgSizeInBits(32);
357
358 // Function alignments.
359 setMinFunctionAlignment(Align(4));
360 // Set preferred alignments.
361 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
362 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
363 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
364 }
365
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const366 bool LoongArchTargetLowering::isOffsetFoldingLegal(
367 const GlobalAddressSDNode *GA) const {
368 // In order to maximise the opportunity for common subexpression elimination,
369 // keep a separate ADD node for the global address offset instead of folding
370 // it in the global address node. Later peephole optimisations may choose to
371 // fold it back in when profitable.
372 return false;
373 }
374
LowerOperation(SDValue Op,SelectionDAG & DAG) const375 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
376 SelectionDAG &DAG) const {
377 switch (Op.getOpcode()) {
378 case ISD::ATOMIC_FENCE:
379 return lowerATOMIC_FENCE(Op, DAG);
380 case ISD::EH_DWARF_CFA:
381 return lowerEH_DWARF_CFA(Op, DAG);
382 case ISD::GlobalAddress:
383 return lowerGlobalAddress(Op, DAG);
384 case ISD::GlobalTLSAddress:
385 return lowerGlobalTLSAddress(Op, DAG);
386 case ISD::INTRINSIC_WO_CHAIN:
387 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
388 case ISD::INTRINSIC_W_CHAIN:
389 return lowerINTRINSIC_W_CHAIN(Op, DAG);
390 case ISD::INTRINSIC_VOID:
391 return lowerINTRINSIC_VOID(Op, DAG);
392 case ISD::BlockAddress:
393 return lowerBlockAddress(Op, DAG);
394 case ISD::JumpTable:
395 return lowerJumpTable(Op, DAG);
396 case ISD::SHL_PARTS:
397 return lowerShiftLeftParts(Op, DAG);
398 case ISD::SRA_PARTS:
399 return lowerShiftRightParts(Op, DAG, true);
400 case ISD::SRL_PARTS:
401 return lowerShiftRightParts(Op, DAG, false);
402 case ISD::ConstantPool:
403 return lowerConstantPool(Op, DAG);
404 case ISD::FP_TO_SINT:
405 return lowerFP_TO_SINT(Op, DAG);
406 case ISD::BITCAST:
407 return lowerBITCAST(Op, DAG);
408 case ISD::UINT_TO_FP:
409 return lowerUINT_TO_FP(Op, DAG);
410 case ISD::SINT_TO_FP:
411 return lowerSINT_TO_FP(Op, DAG);
412 case ISD::VASTART:
413 return lowerVASTART(Op, DAG);
414 case ISD::FRAMEADDR:
415 return lowerFRAMEADDR(Op, DAG);
416 case ISD::RETURNADDR:
417 return lowerRETURNADDR(Op, DAG);
418 case ISD::WRITE_REGISTER:
419 return lowerWRITE_REGISTER(Op, DAG);
420 case ISD::INSERT_VECTOR_ELT:
421 return lowerINSERT_VECTOR_ELT(Op, DAG);
422 case ISD::EXTRACT_VECTOR_ELT:
423 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
424 case ISD::BUILD_VECTOR:
425 return lowerBUILD_VECTOR(Op, DAG);
426 case ISD::VECTOR_SHUFFLE:
427 return lowerVECTOR_SHUFFLE(Op, DAG);
428 }
429 return SDValue();
430 }
431
432 /// Determine whether a range fits a regular pattern of values.
433 /// This function accounts for the possibility of jumping over the End iterator.
434 template <typename ValType>
435 static bool
fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,unsigned CheckStride,typename SmallVectorImpl<ValType>::const_iterator End,ValType ExpectedIndex,unsigned ExpectedIndexStride)436 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
437 unsigned CheckStride,
438 typename SmallVectorImpl<ValType>::const_iterator End,
439 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
440 auto &I = Begin;
441
442 while (I != End) {
443 if (*I != -1 && *I != ExpectedIndex)
444 return false;
445 ExpectedIndex += ExpectedIndexStride;
446
447 // Incrementing past End is undefined behaviour so we must increment one
448 // step at a time and check for End at each step.
449 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
450 ; // Empty loop body.
451 }
452 return true;
453 }
454
455 /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
456 ///
457 /// VREPLVEI performs vector broadcast based on an element specified by an
458 /// integer immediate, with its mask being similar to:
459 /// <x, x, x, ...>
460 /// where x is any valid index.
461 ///
462 /// When undef's appear in the mask they are treated as if they were whatever
463 /// value is necessary in order to fit the above form.
lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)464 static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,
465 MVT VT, SDValue V1, SDValue V2,
466 SelectionDAG &DAG) {
467 int SplatIndex = -1;
468 for (const auto &M : Mask) {
469 if (M != -1) {
470 SplatIndex = M;
471 break;
472 }
473 }
474
475 if (SplatIndex == -1)
476 return DAG.getUNDEF(VT);
477
478 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
479 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
480 APInt Imm(64, SplatIndex);
481 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
482 DAG.getConstant(Imm, DL, MVT::i64));
483 }
484
485 return SDValue();
486 }
487
488 /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
489 ///
490 /// VSHUF4I splits the vector into blocks of four elements, then shuffles these
491 /// elements according to a <4 x i2> constant (encoded as an integer immediate).
492 ///
493 /// It is therefore possible to lower into VSHUF4I when the mask takes the form:
494 /// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
495 /// When undef's appear they are treated as if they were whatever value is
496 /// necessary in order to fit the above forms.
497 ///
498 /// For example:
499 /// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
500 /// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
501 /// i32 7, i32 6, i32 5, i32 4>
502 /// is lowered to:
503 /// (VSHUF4I_H $v0, $v1, 27)
504 /// where the 27 comes from:
505 /// 3 + (2 << 2) + (1 << 4) + (0 << 6)
lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)506 static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
507 MVT VT, SDValue V1, SDValue V2,
508 SelectionDAG &DAG) {
509
510 // When the size is less than 4, lower cost instructions may be used.
511 if (Mask.size() < 4)
512 return SDValue();
513
514 int SubMask[4] = {-1, -1, -1, -1};
515 for (unsigned i = 0; i < 4; ++i) {
516 for (unsigned j = i; j < Mask.size(); j += 4) {
517 int Idx = Mask[j];
518
519 // Convert from vector index to 4-element subvector index
520 // If an index refers to an element outside of the subvector then give up
521 if (Idx != -1) {
522 Idx -= 4 * (j / 4);
523 if (Idx < 0 || Idx >= 4)
524 return SDValue();
525 }
526
527 // If the mask has an undef, replace it with the current index.
528 // Note that it might still be undef if the current index is also undef
529 if (SubMask[i] == -1)
530 SubMask[i] = Idx;
531 // Check that non-undef values are the same as in the mask. If they
532 // aren't then give up
533 else if (Idx != -1 && Idx != SubMask[i])
534 return SDValue();
535 }
536 }
537
538 // Calculate the immediate. Replace any remaining undefs with zero
539 APInt Imm(64, 0);
540 for (int i = 3; i >= 0; --i) {
541 int Idx = SubMask[i];
542
543 if (Idx == -1)
544 Idx = 0;
545
546 Imm <<= 2;
547 Imm |= Idx & 0x3;
548 }
549
550 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
551 DAG.getConstant(Imm, DL, MVT::i64));
552 }
553
554 /// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
555 ///
556 /// VPACKEV interleaves the even elements from each vector.
557 ///
558 /// It is possible to lower into VPACKEV when the mask consists of two of the
559 /// following forms interleaved:
560 /// <0, 2, 4, ...>
561 /// <n, n+2, n+4, ...>
562 /// where n is the number of elements in the vector.
563 /// For example:
564 /// <0, 0, 2, 2, 4, 4, ...>
565 /// <0, n, 2, n+2, 4, n+4, ...>
566 ///
567 /// When undef's appear in the mask they are treated as if they were whatever
568 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)569 static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
570 MVT VT, SDValue V1, SDValue V2,
571 SelectionDAG &DAG) {
572
573 const auto &Begin = Mask.begin();
574 const auto &End = Mask.end();
575 SDValue OriV1 = V1, OriV2 = V2;
576
577 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
578 V1 = OriV1;
579 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
580 V1 = OriV2;
581 else
582 return SDValue();
583
584 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
585 V2 = OriV1;
586 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
587 V2 = OriV2;
588 else
589 return SDValue();
590
591 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
592 }
593
594 /// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
595 ///
596 /// VPACKOD interleaves the odd elements from each vector.
597 ///
598 /// It is possible to lower into VPACKOD when the mask consists of two of the
599 /// following forms interleaved:
600 /// <1, 3, 5, ...>
601 /// <n+1, n+3, n+5, ...>
602 /// where n is the number of elements in the vector.
603 /// For example:
604 /// <1, 1, 3, 3, 5, 5, ...>
605 /// <1, n+1, 3, n+3, 5, n+5, ...>
606 ///
607 /// When undef's appear in the mask they are treated as if they were whatever
608 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)609 static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
610 MVT VT, SDValue V1, SDValue V2,
611 SelectionDAG &DAG) {
612
613 const auto &Begin = Mask.begin();
614 const auto &End = Mask.end();
615 SDValue OriV1 = V1, OriV2 = V2;
616
617 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
618 V1 = OriV1;
619 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
620 V1 = OriV2;
621 else
622 return SDValue();
623
624 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
625 V2 = OriV1;
626 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
627 V2 = OriV2;
628 else
629 return SDValue();
630
631 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
632 }
633
634 /// Lower VECTOR_SHUFFLE into VILVH (if possible).
635 ///
636 /// VILVH interleaves consecutive elements from the left (highest-indexed) half
637 /// of each vector.
638 ///
639 /// It is possible to lower into VILVH when the mask consists of two of the
640 /// following forms interleaved:
641 /// <x, x+1, x+2, ...>
642 /// <n+x, n+x+1, n+x+2, ...>
643 /// where n is the number of elements in the vector and x is half n.
644 /// For example:
645 /// <x, x, x+1, x+1, x+2, x+2, ...>
646 /// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
647 ///
648 /// When undef's appear in the mask they are treated as if they were whatever
649 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VILVH(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)650 static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
651 MVT VT, SDValue V1, SDValue V2,
652 SelectionDAG &DAG) {
653
654 const auto &Begin = Mask.begin();
655 const auto &End = Mask.end();
656 unsigned HalfSize = Mask.size() / 2;
657 SDValue OriV1 = V1, OriV2 = V2;
658
659 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
660 V1 = OriV1;
661 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
662 V1 = OriV2;
663 else
664 return SDValue();
665
666 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
667 V2 = OriV1;
668 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
669 1))
670 V2 = OriV2;
671 else
672 return SDValue();
673
674 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
675 }
676
677 /// Lower VECTOR_SHUFFLE into VILVL (if possible).
678 ///
679 /// VILVL interleaves consecutive elements from the right (lowest-indexed) half
680 /// of each vector.
681 ///
682 /// It is possible to lower into VILVL when the mask consists of two of the
683 /// following forms interleaved:
684 /// <0, 1, 2, ...>
685 /// <n, n+1, n+2, ...>
686 /// where n is the number of elements in the vector.
687 /// For example:
688 /// <0, 0, 1, 1, 2, 2, ...>
689 /// <0, n, 1, n+1, 2, n+2, ...>
690 ///
691 /// When undef's appear in the mask they are treated as if they were whatever
692 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VILVL(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)693 static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
694 MVT VT, SDValue V1, SDValue V2,
695 SelectionDAG &DAG) {
696
697 const auto &Begin = Mask.begin();
698 const auto &End = Mask.end();
699 SDValue OriV1 = V1, OriV2 = V2;
700
701 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
702 V1 = OriV1;
703 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
704 V1 = OriV2;
705 else
706 return SDValue();
707
708 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
709 V2 = OriV1;
710 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
711 V2 = OriV2;
712 else
713 return SDValue();
714
715 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
716 }
717
718 /// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
719 ///
720 /// VPICKEV copies the even elements of each vector into the result vector.
721 ///
722 /// It is possible to lower into VPICKEV when the mask consists of two of the
723 /// following forms concatenated:
724 /// <0, 2, 4, ...>
725 /// <n, n+2, n+4, ...>
726 /// where n is the number of elements in the vector.
727 /// For example:
728 /// <0, 2, 4, ..., 0, 2, 4, ...>
729 /// <0, 2, 4, ..., n, n+2, n+4, ...>
730 ///
731 /// When undef's appear in the mask they are treated as if they were whatever
732 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)733 static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
734 MVT VT, SDValue V1, SDValue V2,
735 SelectionDAG &DAG) {
736
737 const auto &Begin = Mask.begin();
738 const auto &Mid = Mask.begin() + Mask.size() / 2;
739 const auto &End = Mask.end();
740 SDValue OriV1 = V1, OriV2 = V2;
741
742 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
743 V1 = OriV1;
744 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
745 V1 = OriV2;
746 else
747 return SDValue();
748
749 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
750 V2 = OriV1;
751 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
752 V2 = OriV2;
753
754 else
755 return SDValue();
756
757 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
758 }
759
760 /// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
761 ///
762 /// VPICKOD copies the odd elements of each vector into the result vector.
763 ///
764 /// It is possible to lower into VPICKOD when the mask consists of two of the
765 /// following forms concatenated:
766 /// <1, 3, 5, ...>
767 /// <n+1, n+3, n+5, ...>
768 /// where n is the number of elements in the vector.
769 /// For example:
770 /// <1, 3, 5, ..., 1, 3, 5, ...>
771 /// <1, 3, 5, ..., n+1, n+3, n+5, ...>
772 ///
773 /// When undef's appear in the mask they are treated as if they were whatever
774 /// value is necessary in order to fit the above forms.
lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)775 static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
776 MVT VT, SDValue V1, SDValue V2,
777 SelectionDAG &DAG) {
778
779 const auto &Begin = Mask.begin();
780 const auto &Mid = Mask.begin() + Mask.size() / 2;
781 const auto &End = Mask.end();
782 SDValue OriV1 = V1, OriV2 = V2;
783
784 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
785 V1 = OriV1;
786 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
787 V1 = OriV2;
788 else
789 return SDValue();
790
791 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
792 V2 = OriV1;
793 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
794 V2 = OriV2;
795 else
796 return SDValue();
797
798 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
799 }
800
801 /// Lower VECTOR_SHUFFLE into VSHUF.
802 ///
803 /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
804 /// adding it as an operand to the resulting VSHUF.
lowerVECTOR_SHUFFLE_VSHUF(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)805 static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
806 MVT VT, SDValue V1, SDValue V2,
807 SelectionDAG &DAG) {
808
809 SmallVector<SDValue, 16> Ops;
810 for (auto M : Mask)
811 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
812
813 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
814 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
815
816 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
817 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
818 // VSHF concatenates the vectors in a bitwise fashion:
819 // <0b00, 0b01> + <0b10, 0b11> ->
820 // 0b0100 + 0b1110 -> 0b01001110
821 // <0b10, 0b11, 0b00, 0b01>
822 // We must therefore swap the operands to get the correct result.
823 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
824 }
825
826 /// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
827 ///
828 /// This routine breaks down the specific type of 128-bit shuffle and
829 /// dispatches to the lowering routines accordingly.
lower128BitShuffle(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)830 static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
831 SDValue V1, SDValue V2, SelectionDAG &DAG) {
832 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
833 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
834 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
835 "Vector type is unsupported for lsx!");
836 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
837 "Two operands have different types!");
838 assert(VT.getVectorNumElements() == Mask.size() &&
839 "Unexpected mask size for shuffle!");
840 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
841
842 SDValue Result;
843 // TODO: Add more comparison patterns.
844 if (V2.isUndef()) {
845 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
846 return Result;
847 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
848 return Result;
849
850 // TODO: This comment may be enabled in the future to better match the
851 // pattern for instruction selection.
852 /* V2 = V1; */
853 }
854
855 // It is recommended not to change the pattern comparison order for better
856 // performance.
857 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
858 return Result;
859 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
860 return Result;
861 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
862 return Result;
863 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
864 return Result;
865 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
866 return Result;
867 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
868 return Result;
869 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
870 return Result;
871
872 return SDValue();
873 }
874
875 /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
876 ///
877 /// It is a XVREPLVEI when the mask is:
878 /// <x, x, x, ..., x+n, x+n, x+n, ...>
879 /// where the number of x is equal to n and n is half the length of vector.
880 ///
881 /// When undef's appear in the mask they are treated as if they were whatever
882 /// value is necessary in order to fit the above form.
lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)883 static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
884 ArrayRef<int> Mask, MVT VT,
885 SDValue V1, SDValue V2,
886 SelectionDAG &DAG) {
887 int SplatIndex = -1;
888 for (const auto &M : Mask) {
889 if (M != -1) {
890 SplatIndex = M;
891 break;
892 }
893 }
894
895 if (SplatIndex == -1)
896 return DAG.getUNDEF(VT);
897
898 const auto &Begin = Mask.begin();
899 const auto &End = Mask.end();
900 unsigned HalfSize = Mask.size() / 2;
901
902 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
903 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
904 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
905 0)) {
906 APInt Imm(64, SplatIndex);
907 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
908 DAG.getConstant(Imm, DL, MVT::i64));
909 }
910
911 return SDValue();
912 }
913
914 /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)915 static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
916 MVT VT, SDValue V1, SDValue V2,
917 SelectionDAG &DAG) {
918 // When the size is less than or equal to 4, lower cost instructions may be
919 // used.
920 if (Mask.size() <= 4)
921 return SDValue();
922 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
923 }
924
925 /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)926 static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
927 MVT VT, SDValue V1, SDValue V2,
928 SelectionDAG &DAG) {
929 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
930 }
931
932 /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)933 static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
934 MVT VT, SDValue V1, SDValue V2,
935 SelectionDAG &DAG) {
936 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
937 }
938
939 /// Lower VECTOR_SHUFFLE into XVILVH (if possible).
lowerVECTOR_SHUFFLE_XVILVH(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)940 static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
941 MVT VT, SDValue V1, SDValue V2,
942 SelectionDAG &DAG) {
943
944 const auto &Begin = Mask.begin();
945 const auto &End = Mask.end();
946 unsigned HalfSize = Mask.size() / 2;
947 unsigned LeftSize = HalfSize / 2;
948 SDValue OriV1 = V1, OriV2 = V2;
949
950 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
951 1) &&
952 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
953 V1 = OriV1;
954 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
955 Mask.size() + HalfSize - LeftSize, 1) &&
956 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
957 Mask.size() + HalfSize + LeftSize, 1))
958 V1 = OriV2;
959 else
960 return SDValue();
961
962 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
963 1) &&
964 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
965 1))
966 V2 = OriV1;
967 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
968 Mask.size() + HalfSize - LeftSize, 1) &&
969 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
970 Mask.size() + HalfSize + LeftSize, 1))
971 V2 = OriV2;
972 else
973 return SDValue();
974
975 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
976 }
977
978 /// Lower VECTOR_SHUFFLE into XVILVL (if possible).
lowerVECTOR_SHUFFLE_XVILVL(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)979 static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
980 MVT VT, SDValue V1, SDValue V2,
981 SelectionDAG &DAG) {
982
983 const auto &Begin = Mask.begin();
984 const auto &End = Mask.end();
985 unsigned HalfSize = Mask.size() / 2;
986 SDValue OriV1 = V1, OriV2 = V2;
987
988 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
989 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
990 V1 = OriV1;
991 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
992 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
993 Mask.size() + HalfSize, 1))
994 V1 = OriV2;
995 else
996 return SDValue();
997
998 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
999 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1000 V2 = OriV1;
1001 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1002 1) &&
1003 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1004 Mask.size() + HalfSize, 1))
1005 V2 = OriV2;
1006 else
1007 return SDValue();
1008
1009 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1010 }
1011
1012 /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1013 static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1014 MVT VT, SDValue V1, SDValue V2,
1015 SelectionDAG &DAG) {
1016
1017 const auto &Begin = Mask.begin();
1018 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1019 const auto &Mid = Mask.begin() + Mask.size() / 2;
1020 const auto &RightMid = Mask.end() - Mask.size() / 4;
1021 const auto &End = Mask.end();
1022 unsigned HalfSize = Mask.size() / 2;
1023 SDValue OriV1 = V1, OriV2 = V2;
1024
1025 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1026 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1027 V1 = OriV1;
1028 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1029 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1030 V1 = OriV2;
1031 else
1032 return SDValue();
1033
1034 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1035 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1036 V2 = OriV1;
1037 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1038 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1039 V2 = OriV2;
1040
1041 else
1042 return SDValue();
1043
1044 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1045 }
1046
1047 /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1048 static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1049 MVT VT, SDValue V1, SDValue V2,
1050 SelectionDAG &DAG) {
1051
1052 const auto &Begin = Mask.begin();
1053 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1054 const auto &Mid = Mask.begin() + Mask.size() / 2;
1055 const auto &RightMid = Mask.end() - Mask.size() / 4;
1056 const auto &End = Mask.end();
1057 unsigned HalfSize = Mask.size() / 2;
1058 SDValue OriV1 = V1, OriV2 = V2;
1059
1060 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1061 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1062 V1 = OriV1;
1063 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1064 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1065 2))
1066 V1 = OriV2;
1067 else
1068 return SDValue();
1069
1070 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1071 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1072 V2 = OriV1;
1073 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1074 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1075 2))
1076 V2 = OriV2;
1077 else
1078 return SDValue();
1079
1080 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1081 }
1082
1083 /// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1084 static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1085 MVT VT, SDValue V1, SDValue V2,
1086 SelectionDAG &DAG) {
1087
1088 int MaskSize = Mask.size();
1089 int HalfSize = Mask.size() / 2;
1090 const auto &Begin = Mask.begin();
1091 const auto &Mid = Mask.begin() + HalfSize;
1092 const auto &End = Mask.end();
1093
1094 // VECTOR_SHUFFLE concatenates the vectors:
1095 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1096 // shuffling ->
1097 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1098 //
1099 // XVSHUF concatenates the vectors:
1100 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1101 // shuffling ->
1102 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1103 SmallVector<SDValue, 8> MaskAlloc;
1104 for (auto it = Begin; it < Mid; it++) {
1105 if (*it < 0) // UNDEF
1106 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1107 else if ((*it >= 0 && *it < HalfSize) ||
1108 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1109 int M = *it < HalfSize ? *it : *it - HalfSize;
1110 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1111 } else
1112 return SDValue();
1113 }
1114 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1115
1116 for (auto it = Mid; it < End; it++) {
1117 if (*it < 0) // UNDEF
1118 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1119 else if ((*it >= HalfSize && *it < MaskSize) ||
1120 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1121 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1122 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1123 } else
1124 return SDValue();
1125 }
1126 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1127
1128 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1129 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1130 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1131 }
1132
1133 /// Shuffle vectors by lane to generate more optimized instructions.
1134 /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1135 ///
1136 /// Therefore, except for the following four cases, other cases are regarded
1137 /// as cross-lane shuffles, where optimization is relatively limited.
1138 ///
1139 /// - Shuffle high, low lanes of two inputs vector
1140 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1141 /// - Shuffle low, high lanes of two inputs vector
1142 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1143 /// - Shuffle low, low lanes of two inputs vector
1144 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1145 /// - Shuffle high, high lanes of two inputs vector
1146 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1147 ///
1148 /// The first case is the closest to LoongArch instructions and the other
1149 /// cases need to be converted to it for processing.
1150 ///
1151 /// This function may modify V1, V2 and Mask
canonicalizeShuffleVectorByLane(const SDLoc & DL,MutableArrayRef<int> Mask,MVT VT,SDValue & V1,SDValue & V2,SelectionDAG & DAG)1152 static void canonicalizeShuffleVectorByLane(const SDLoc &DL,
1153 MutableArrayRef<int> Mask, MVT VT,
1154 SDValue &V1, SDValue &V2,
1155 SelectionDAG &DAG) {
1156
1157 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1158
1159 int MaskSize = Mask.size();
1160 int HalfSize = Mask.size() / 2;
1161
1162 HalfMaskType preMask = None, postMask = None;
1163
1164 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1165 return M < 0 || (M >= 0 && M < HalfSize) ||
1166 (M >= MaskSize && M < MaskSize + HalfSize);
1167 }))
1168 preMask = HighLaneTy;
1169 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1170 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1171 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1172 }))
1173 preMask = LowLaneTy;
1174
1175 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1176 return M < 0 || (M >= 0 && M < HalfSize) ||
1177 (M >= MaskSize && M < MaskSize + HalfSize);
1178 }))
1179 postMask = HighLaneTy;
1180 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1181 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1182 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1183 }))
1184 postMask = LowLaneTy;
1185
1186 // The pre-half of mask is high lane type, and the post-half of mask
1187 // is low lane type, which is closest to the LoongArch instructions.
1188 //
1189 // Note: In the LoongArch architecture, the high lane of mask corresponds
1190 // to the lower 128-bit of vector register, and the low lane of mask
1191 // corresponds the higher 128-bit of vector register.
1192 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1193 return;
1194 }
1195 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1196 V1 = DAG.getBitcast(MVT::v4i64, V1);
1197 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1198 DAG.getConstant(0b01001110, DL, MVT::i64));
1199 V1 = DAG.getBitcast(VT, V1);
1200
1201 if (!V2.isUndef()) {
1202 V2 = DAG.getBitcast(MVT::v4i64, V2);
1203 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1204 DAG.getConstant(0b01001110, DL, MVT::i64));
1205 V2 = DAG.getBitcast(VT, V2);
1206 }
1207
1208 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1209 *it = *it < 0 ? *it : *it - HalfSize;
1210 }
1211 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1212 *it = *it < 0 ? *it : *it + HalfSize;
1213 }
1214 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1215 V1 = DAG.getBitcast(MVT::v4i64, V1);
1216 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1217 DAG.getConstant(0b11101110, DL, MVT::i64));
1218 V1 = DAG.getBitcast(VT, V1);
1219
1220 if (!V2.isUndef()) {
1221 V2 = DAG.getBitcast(MVT::v4i64, V2);
1222 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1223 DAG.getConstant(0b11101110, DL, MVT::i64));
1224 V2 = DAG.getBitcast(VT, V2);
1225 }
1226
1227 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1228 *it = *it < 0 ? *it : *it - HalfSize;
1229 }
1230 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1231 V1 = DAG.getBitcast(MVT::v4i64, V1);
1232 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1233 DAG.getConstant(0b01000100, DL, MVT::i64));
1234 V1 = DAG.getBitcast(VT, V1);
1235
1236 if (!V2.isUndef()) {
1237 V2 = DAG.getBitcast(MVT::v4i64, V2);
1238 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1239 DAG.getConstant(0b01000100, DL, MVT::i64));
1240 V2 = DAG.getBitcast(VT, V2);
1241 }
1242
1243 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1244 *it = *it < 0 ? *it : *it + HalfSize;
1245 }
1246 } else { // cross-lane
1247 return;
1248 }
1249 }
1250
1251 /// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1252 ///
1253 /// This routine breaks down the specific type of 256-bit shuffle and
1254 /// dispatches to the lowering routines accordingly.
lower256BitShuffle(const SDLoc & DL,ArrayRef<int> Mask,MVT VT,SDValue V1,SDValue V2,SelectionDAG & DAG)1255 static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1256 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1257 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1258 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1259 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1260 "Vector type is unsupported for lasx!");
1261 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1262 "Two operands have different types!");
1263 assert(VT.getVectorNumElements() == Mask.size() &&
1264 "Unexpected mask size for shuffle!");
1265 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1266 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1267
1268 // canonicalize non cross-lane shuffle vector
1269 SmallVector<int> NewMask(Mask);
1270 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1271
1272 SDValue Result;
1273 // TODO: Add more comparison patterns.
1274 if (V2.isUndef()) {
1275 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1276 return Result;
1277 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1278 return Result;
1279
1280 // TODO: This comment may be enabled in the future to better match the
1281 // pattern for instruction selection.
1282 /* V2 = V1; */
1283 }
1284
1285 // It is recommended not to change the pattern comparison order for better
1286 // performance.
1287 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1288 return Result;
1289 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1290 return Result;
1291 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1292 return Result;
1293 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1294 return Result;
1295 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1296 return Result;
1297 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1298 return Result;
1299 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1300 return Result;
1301
1302 return SDValue();
1303 }
1304
lowerVECTOR_SHUFFLE(SDValue Op,SelectionDAG & DAG) const1305 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1306 SelectionDAG &DAG) const {
1307 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1308 ArrayRef<int> OrigMask = SVOp->getMask();
1309 SDValue V1 = Op.getOperand(0);
1310 SDValue V2 = Op.getOperand(1);
1311 MVT VT = Op.getSimpleValueType();
1312 int NumElements = VT.getVectorNumElements();
1313 SDLoc DL(Op);
1314
1315 bool V1IsUndef = V1.isUndef();
1316 bool V2IsUndef = V2.isUndef();
1317 if (V1IsUndef && V2IsUndef)
1318 return DAG.getUNDEF(VT);
1319
1320 // When we create a shuffle node we put the UNDEF node to second operand,
1321 // but in some cases the first operand may be transformed to UNDEF.
1322 // In this case we should just commute the node.
1323 if (V1IsUndef)
1324 return DAG.getCommutedVectorShuffle(*SVOp);
1325
1326 // Check for non-undef masks pointing at an undef vector and make the masks
1327 // undef as well. This makes it easier to match the shuffle based solely on
1328 // the mask.
1329 if (V2IsUndef &&
1330 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1331 SmallVector<int, 8> NewMask(OrigMask);
1332 for (int &M : NewMask)
1333 if (M >= NumElements)
1334 M = -1;
1335 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1336 }
1337
1338 // Check for illegal shuffle mask element index values.
1339 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1340 (void)MaskUpperLimit;
1341 assert(llvm::all_of(OrigMask,
1342 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1343 "Out of bounds shuffle index");
1344
1345 // For each vector width, delegate to a specialized lowering routine.
1346 if (VT.is128BitVector())
1347 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1348
1349 if (VT.is256BitVector())
1350 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1351
1352 return SDValue();
1353 }
1354
isConstantOrUndef(const SDValue Op)1355 static bool isConstantOrUndef(const SDValue Op) {
1356 if (Op->isUndef())
1357 return true;
1358 if (isa<ConstantSDNode>(Op))
1359 return true;
1360 if (isa<ConstantFPSDNode>(Op))
1361 return true;
1362 return false;
1363 }
1364
isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode * Op)1365 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
1366 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1367 if (isConstantOrUndef(Op->getOperand(i)))
1368 return true;
1369 return false;
1370 }
1371
lowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const1372 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1373 SelectionDAG &DAG) const {
1374 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1375 EVT ResTy = Op->getValueType(0);
1376 SDLoc DL(Op);
1377 APInt SplatValue, SplatUndef;
1378 unsigned SplatBitSize;
1379 bool HasAnyUndefs;
1380 bool Is128Vec = ResTy.is128BitVector();
1381 bool Is256Vec = ResTy.is256BitVector();
1382
1383 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1384 (!Subtarget.hasExtLASX() || !Is256Vec))
1385 return SDValue();
1386
1387 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1388 /*MinSplatBits=*/8) &&
1389 SplatBitSize <= 64) {
1390 // We can only cope with 8, 16, 32, or 64-bit elements.
1391 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1392 SplatBitSize != 64)
1393 return SDValue();
1394
1395 EVT ViaVecTy;
1396
1397 switch (SplatBitSize) {
1398 default:
1399 return SDValue();
1400 case 8:
1401 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1402 break;
1403 case 16:
1404 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1405 break;
1406 case 32:
1407 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1408 break;
1409 case 64:
1410 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1411 break;
1412 }
1413
1414 // SelectionDAG::getConstant will promote SplatValue appropriately.
1415 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1416
1417 // Bitcast to the type we originally wanted.
1418 if (ViaVecTy != ResTy)
1419 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1420
1421 return Result;
1422 }
1423
1424 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1425 return Op;
1426
1427 if (!isConstantOrUndefBUILD_VECTOR(Node)) {
1428 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1429 // The resulting code is the same length as the expansion, but it doesn't
1430 // use memory operations.
1431 EVT ResTy = Node->getValueType(0);
1432
1433 assert(ResTy.isVector());
1434
1435 unsigned NumElts = ResTy.getVectorNumElements();
1436 SDValue Vector = DAG.getUNDEF(ResTy);
1437 for (unsigned i = 0; i < NumElts; ++i) {
1438 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
1439 Node->getOperand(i),
1440 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1441 }
1442 return Vector;
1443 }
1444
1445 return SDValue();
1446 }
1447
1448 SDValue
lowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const1449 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1450 SelectionDAG &DAG) const {
1451 EVT VecTy = Op->getOperand(0)->getValueType(0);
1452 SDValue Idx = Op->getOperand(1);
1453 EVT EltTy = VecTy.getVectorElementType();
1454 unsigned NumElts = VecTy.getVectorNumElements();
1455
1456 if (isa<ConstantSDNode>(Idx) &&
1457 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1458 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1459 return Op;
1460
1461 return SDValue();
1462 }
1463
1464 SDValue
lowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const1465 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1466 SelectionDAG &DAG) const {
1467 if (isa<ConstantSDNode>(Op->getOperand(2)))
1468 return Op;
1469 return SDValue();
1470 }
1471
lowerATOMIC_FENCE(SDValue Op,SelectionDAG & DAG) const1472 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1473 SelectionDAG &DAG) const {
1474 SDLoc DL(Op);
1475 SyncScope::ID FenceSSID =
1476 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1477
1478 // singlethread fences only synchronize with signal handlers on the same
1479 // thread and thus only need to preserve instruction order, not actually
1480 // enforce memory ordering.
1481 if (FenceSSID == SyncScope::SingleThread)
1482 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1483 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1484
1485 return Op;
1486 }
1487
lowerWRITE_REGISTER(SDValue Op,SelectionDAG & DAG) const1488 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1489 SelectionDAG &DAG) const {
1490
1491 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1492 DAG.getContext()->emitError(
1493 "On LA64, only 64-bit registers can be written.");
1494 return Op.getOperand(0);
1495 }
1496
1497 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1498 DAG.getContext()->emitError(
1499 "On LA32, only 32-bit registers can be written.");
1500 return Op.getOperand(0);
1501 }
1502
1503 return Op;
1504 }
1505
lowerFRAMEADDR(SDValue Op,SelectionDAG & DAG) const1506 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1507 SelectionDAG &DAG) const {
1508 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1509 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1510 "be a constant integer");
1511 return SDValue();
1512 }
1513
1514 MachineFunction &MF = DAG.getMachineFunction();
1515 MF.getFrameInfo().setFrameAddressIsTaken(true);
1516 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1517 EVT VT = Op.getValueType();
1518 SDLoc DL(Op);
1519 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1520 unsigned Depth = Op.getConstantOperandVal(0);
1521 int GRLenInBytes = Subtarget.getGRLen() / 8;
1522
1523 while (Depth--) {
1524 int Offset = -(GRLenInBytes * 2);
1525 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1526 DAG.getIntPtrConstant(Offset, DL));
1527 FrameAddr =
1528 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1529 }
1530 return FrameAddr;
1531 }
1532
lowerRETURNADDR(SDValue Op,SelectionDAG & DAG) const1533 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1534 SelectionDAG &DAG) const {
1535 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1536 return SDValue();
1537
1538 // Currently only support lowering return address for current frame.
1539 if (Op.getConstantOperandVal(0) != 0) {
1540 DAG.getContext()->emitError(
1541 "return address can only be determined for the current frame");
1542 return SDValue();
1543 }
1544
1545 MachineFunction &MF = DAG.getMachineFunction();
1546 MF.getFrameInfo().setReturnAddressIsTaken(true);
1547 MVT GRLenVT = Subtarget.getGRLenVT();
1548
1549 // Return the value of the return address register, marking it an implicit
1550 // live-in.
1551 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1552 getRegClassFor(GRLenVT));
1553 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1554 }
1555
lowerEH_DWARF_CFA(SDValue Op,SelectionDAG & DAG) const1556 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1557 SelectionDAG &DAG) const {
1558 MachineFunction &MF = DAG.getMachineFunction();
1559 auto Size = Subtarget.getGRLen() / 8;
1560 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1561 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1562 }
1563
lowerVASTART(SDValue Op,SelectionDAG & DAG) const1564 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1565 SelectionDAG &DAG) const {
1566 MachineFunction &MF = DAG.getMachineFunction();
1567 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1568
1569 SDLoc DL(Op);
1570 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1571 getPointerTy(MF.getDataLayout()));
1572
1573 // vastart just stores the address of the VarArgsFrameIndex slot into the
1574 // memory location argument.
1575 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1576 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1577 MachinePointerInfo(SV));
1578 }
1579
lowerUINT_TO_FP(SDValue Op,SelectionDAG & DAG) const1580 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1581 SelectionDAG &DAG) const {
1582 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1583 !Subtarget.hasBasicD() && "unexpected target features");
1584
1585 SDLoc DL(Op);
1586 SDValue Op0 = Op.getOperand(0);
1587 if (Op0->getOpcode() == ISD::AND) {
1588 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1589 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1590 return Op;
1591 }
1592
1593 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1594 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1595 Op0.getConstantOperandVal(2) == UINT64_C(0))
1596 return Op;
1597
1598 if (Op0.getOpcode() == ISD::AssertZext &&
1599 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1600 return Op;
1601
1602 EVT OpVT = Op0.getValueType();
1603 EVT RetVT = Op.getValueType();
1604 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1605 MakeLibCallOptions CallOptions;
1606 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1607 SDValue Chain = SDValue();
1608 SDValue Result;
1609 std::tie(Result, Chain) =
1610 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1611 return Result;
1612 }
1613
lowerSINT_TO_FP(SDValue Op,SelectionDAG & DAG) const1614 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1615 SelectionDAG &DAG) const {
1616 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1617 !Subtarget.hasBasicD() && "unexpected target features");
1618
1619 SDLoc DL(Op);
1620 SDValue Op0 = Op.getOperand(0);
1621
1622 if ((Op0.getOpcode() == ISD::AssertSext ||
1623 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
1624 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1625 return Op;
1626
1627 EVT OpVT = Op0.getValueType();
1628 EVT RetVT = Op.getValueType();
1629 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1630 MakeLibCallOptions CallOptions;
1631 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1632 SDValue Chain = SDValue();
1633 SDValue Result;
1634 std::tie(Result, Chain) =
1635 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1636 return Result;
1637 }
1638
lowerBITCAST(SDValue Op,SelectionDAG & DAG) const1639 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1640 SelectionDAG &DAG) const {
1641
1642 SDLoc DL(Op);
1643 SDValue Op0 = Op.getOperand(0);
1644
1645 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1646 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1647 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1648 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1649 }
1650 return Op;
1651 }
1652
lowerFP_TO_SINT(SDValue Op,SelectionDAG & DAG) const1653 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1654 SelectionDAG &DAG) const {
1655
1656 SDLoc DL(Op);
1657
1658 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1659 !Subtarget.hasBasicD()) {
1660 SDValue Dst =
1661 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
1662 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1663 }
1664
1665 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1666 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
1667 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1668 }
1669
getTargetNode(GlobalAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)1670 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1671 SelectionDAG &DAG, unsigned Flags) {
1672 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1673 }
1674
getTargetNode(BlockAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)1675 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1676 SelectionDAG &DAG, unsigned Flags) {
1677 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1678 Flags);
1679 }
1680
getTargetNode(ConstantPoolSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)1681 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1682 SelectionDAG &DAG, unsigned Flags) {
1683 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1684 N->getOffset(), Flags);
1685 }
1686
getTargetNode(JumpTableSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)1687 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1688 SelectionDAG &DAG, unsigned Flags) {
1689 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1690 }
1691
1692 template <class NodeTy>
getAddr(NodeTy * N,SelectionDAG & DAG,CodeModel::Model M,bool IsLocal) const1693 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1694 CodeModel::Model M,
1695 bool IsLocal) const {
1696 SDLoc DL(N);
1697 EVT Ty = getPointerTy(DAG.getDataLayout());
1698 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1699 SDValue Load;
1700
1701 switch (M) {
1702 default:
1703 report_fatal_error("Unsupported code model");
1704
1705 case CodeModel::Large: {
1706 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1707
1708 // This is not actually used, but is necessary for successfully matching
1709 // the PseudoLA_*_LARGE nodes.
1710 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1711 if (IsLocal) {
1712 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1713 // eventually becomes the desired 5-insn code sequence.
1714 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1715 Tmp, Addr),
1716 0);
1717 } else {
1718 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1719 // eventually becomes the desired 5-insn code sequence.
1720 Load = SDValue(
1721 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1722 0);
1723 }
1724 break;
1725 }
1726
1727 case CodeModel::Small:
1728 case CodeModel::Medium:
1729 if (IsLocal) {
1730 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1731 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1732 Load = SDValue(
1733 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1734 } else {
1735 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1736 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1737 Load =
1738 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1739 }
1740 }
1741
1742 if (!IsLocal) {
1743 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1744 MachineFunction &MF = DAG.getMachineFunction();
1745 MachineMemOperand *MemOp = MF.getMachineMemOperand(
1746 MachinePointerInfo::getGOT(MF),
1747 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1748 MachineMemOperand::MOInvariant,
1749 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1750 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1751 }
1752
1753 return Load;
1754 }
1755
lowerBlockAddress(SDValue Op,SelectionDAG & DAG) const1756 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1757 SelectionDAG &DAG) const {
1758 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1759 DAG.getTarget().getCodeModel());
1760 }
1761
lowerJumpTable(SDValue Op,SelectionDAG & DAG) const1762 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1763 SelectionDAG &DAG) const {
1764 return getAddr(cast<JumpTableSDNode>(Op), DAG,
1765 DAG.getTarget().getCodeModel());
1766 }
1767
lowerConstantPool(SDValue Op,SelectionDAG & DAG) const1768 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1769 SelectionDAG &DAG) const {
1770 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1771 DAG.getTarget().getCodeModel());
1772 }
1773
lowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const1774 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1775 SelectionDAG &DAG) const {
1776 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1777 assert(N->getOffset() == 0 && "unexpected offset in global node");
1778 auto CM = DAG.getTarget().getCodeModel();
1779 const GlobalValue *GV = N->getGlobal();
1780
1781 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1782 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1783 CM = *GCM;
1784 }
1785
1786 return getAddr(N, DAG, CM, GV->isDSOLocal());
1787 }
1788
getStaticTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc,bool UseGOT,bool Large) const1789 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1790 SelectionDAG &DAG,
1791 unsigned Opc, bool UseGOT,
1792 bool Large) const {
1793 SDLoc DL(N);
1794 EVT Ty = getPointerTy(DAG.getDataLayout());
1795 MVT GRLenVT = Subtarget.getGRLenVT();
1796
1797 // This is not actually used, but is necessary for successfully matching the
1798 // PseudoLA_*_LARGE nodes.
1799 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1800 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1801 SDValue Offset = Large
1802 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1803 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1804 if (UseGOT) {
1805 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1806 MachineFunction &MF = DAG.getMachineFunction();
1807 MachineMemOperand *MemOp = MF.getMachineMemOperand(
1808 MachinePointerInfo::getGOT(MF),
1809 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1810 MachineMemOperand::MOInvariant,
1811 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1812 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1813 }
1814
1815 // Add the thread pointer.
1816 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1817 DAG.getRegister(LoongArch::R2, GRLenVT));
1818 }
1819
getDynamicTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc,bool Large) const1820 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1821 SelectionDAG &DAG,
1822 unsigned Opc,
1823 bool Large) const {
1824 SDLoc DL(N);
1825 EVT Ty = getPointerTy(DAG.getDataLayout());
1826 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1827
1828 // This is not actually used, but is necessary for successfully matching the
1829 // PseudoLA_*_LARGE nodes.
1830 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1831
1832 // Use a PC-relative addressing mode to access the dynamic GOT address.
1833 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1834 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1835 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1836
1837 // Prepare argument list to generate call.
1838 ArgListTy Args;
1839 ArgListEntry Entry;
1840 Entry.Node = Load;
1841 Entry.Ty = CallTy;
1842 Args.push_back(Entry);
1843
1844 // Setup call to __tls_get_addr.
1845 TargetLowering::CallLoweringInfo CLI(DAG);
1846 CLI.setDebugLoc(DL)
1847 .setChain(DAG.getEntryNode())
1848 .setLibCallee(CallingConv::C, CallTy,
1849 DAG.getExternalSymbol("__tls_get_addr", Ty),
1850 std::move(Args));
1851
1852 return LowerCallTo(CLI).first;
1853 }
1854
getTLSDescAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc,bool Large) const1855 SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1856 SelectionDAG &DAG, unsigned Opc,
1857 bool Large) const {
1858 SDLoc DL(N);
1859 EVT Ty = getPointerTy(DAG.getDataLayout());
1860 const GlobalValue *GV = N->getGlobal();
1861
1862 // This is not actually used, but is necessary for successfully matching the
1863 // PseudoLA_*_LARGE nodes.
1864 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1865
1866 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1867 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1868 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1869 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1870 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1871 }
1872
1873 SDValue
lowerGlobalTLSAddress(SDValue Op,SelectionDAG & DAG) const1874 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1875 SelectionDAG &DAG) const {
1876 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1877 CallingConv::GHC)
1878 report_fatal_error("In GHC calling convention TLS is not supported");
1879
1880 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1881 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1882
1883 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1884 assert(N->getOffset() == 0 && "unexpected offset in global node");
1885
1886 if (DAG.getTarget().useEmulatedTLS())
1887 report_fatal_error("the emulated TLS is prohibited",
1888 /*GenCrashDiag=*/false);
1889
1890 bool IsDesc = DAG.getTarget().useTLSDESC();
1891
1892 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1893 case TLSModel::GeneralDynamic:
1894 // In this model, application code calls the dynamic linker function
1895 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1896 // runtime.
1897 if (!IsDesc)
1898 return getDynamicTLSAddr(N, DAG,
1899 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1900 : LoongArch::PseudoLA_TLS_GD,
1901 Large);
1902 break;
1903 case TLSModel::LocalDynamic:
1904 // Same as GeneralDynamic, except for assembly modifiers and relocation
1905 // records.
1906 if (!IsDesc)
1907 return getDynamicTLSAddr(N, DAG,
1908 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1909 : LoongArch::PseudoLA_TLS_LD,
1910 Large);
1911 break;
1912 case TLSModel::InitialExec:
1913 // This model uses the GOT to resolve TLS offsets.
1914 return getStaticTLSAddr(N, DAG,
1915 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1916 : LoongArch::PseudoLA_TLS_IE,
1917 /*UseGOT=*/true, Large);
1918 case TLSModel::LocalExec:
1919 // This model is used when static linking as the TLS offsets are resolved
1920 // during program linking.
1921 //
1922 // This node doesn't need an extra argument for the large code model.
1923 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
1924 /*UseGOT=*/false);
1925 }
1926
1927 return getTLSDescAddr(N, DAG,
1928 Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE
1929 : LoongArch::PseudoLA_TLS_DESC_PC,
1930 Large);
1931 }
1932
1933 template <unsigned N>
checkIntrinsicImmArg(SDValue Op,unsigned ImmOp,SelectionDAG & DAG,bool IsSigned=false)1934 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
1935 SelectionDAG &DAG, bool IsSigned = false) {
1936 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1937 // Check the ImmArg.
1938 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
1939 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
1940 DAG.getContext()->emitError(Op->getOperationName(0) +
1941 ": argument out of range.");
1942 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
1943 }
1944 return SDValue();
1945 }
1946
1947 SDValue
lowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const1948 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1949 SelectionDAG &DAG) const {
1950 SDLoc DL(Op);
1951 switch (Op.getConstantOperandVal(0)) {
1952 default:
1953 return SDValue(); // Don't custom lower most intrinsics.
1954 case Intrinsic::thread_pointer: {
1955 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1956 return DAG.getRegister(LoongArch::R2, PtrVT);
1957 }
1958 case Intrinsic::loongarch_lsx_vpickve2gr_d:
1959 case Intrinsic::loongarch_lsx_vpickve2gr_du:
1960 case Intrinsic::loongarch_lsx_vreplvei_d:
1961 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
1962 return checkIntrinsicImmArg<1>(Op, 2, DAG);
1963 case Intrinsic::loongarch_lsx_vreplvei_w:
1964 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
1965 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
1966 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
1967 case Intrinsic::loongarch_lasx_xvpickve_d:
1968 case Intrinsic::loongarch_lasx_xvpickve_d_f:
1969 return checkIntrinsicImmArg<2>(Op, 2, DAG);
1970 case Intrinsic::loongarch_lasx_xvinsve0_d:
1971 return checkIntrinsicImmArg<2>(Op, 3, DAG);
1972 case Intrinsic::loongarch_lsx_vsat_b:
1973 case Intrinsic::loongarch_lsx_vsat_bu:
1974 case Intrinsic::loongarch_lsx_vrotri_b:
1975 case Intrinsic::loongarch_lsx_vsllwil_h_b:
1976 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1977 case Intrinsic::loongarch_lsx_vsrlri_b:
1978 case Intrinsic::loongarch_lsx_vsrari_b:
1979 case Intrinsic::loongarch_lsx_vreplvei_h:
1980 case Intrinsic::loongarch_lasx_xvsat_b:
1981 case Intrinsic::loongarch_lasx_xvsat_bu:
1982 case Intrinsic::loongarch_lasx_xvrotri_b:
1983 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1984 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1985 case Intrinsic::loongarch_lasx_xvsrlri_b:
1986 case Intrinsic::loongarch_lasx_xvsrari_b:
1987 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1988 case Intrinsic::loongarch_lasx_xvpickve_w:
1989 case Intrinsic::loongarch_lasx_xvpickve_w_f:
1990 return checkIntrinsicImmArg<3>(Op, 2, DAG);
1991 case Intrinsic::loongarch_lasx_xvinsve0_w:
1992 return checkIntrinsicImmArg<3>(Op, 3, DAG);
1993 case Intrinsic::loongarch_lsx_vsat_h:
1994 case Intrinsic::loongarch_lsx_vsat_hu:
1995 case Intrinsic::loongarch_lsx_vrotri_h:
1996 case Intrinsic::loongarch_lsx_vsllwil_w_h:
1997 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1998 case Intrinsic::loongarch_lsx_vsrlri_h:
1999 case Intrinsic::loongarch_lsx_vsrari_h:
2000 case Intrinsic::loongarch_lsx_vreplvei_b:
2001 case Intrinsic::loongarch_lasx_xvsat_h:
2002 case Intrinsic::loongarch_lasx_xvsat_hu:
2003 case Intrinsic::loongarch_lasx_xvrotri_h:
2004 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2005 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2006 case Intrinsic::loongarch_lasx_xvsrlri_h:
2007 case Intrinsic::loongarch_lasx_xvsrari_h:
2008 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2009 return checkIntrinsicImmArg<4>(Op, 2, DAG);
2010 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2011 case Intrinsic::loongarch_lsx_vsrani_b_h:
2012 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2013 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2014 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2015 case Intrinsic::loongarch_lsx_vssrani_b_h:
2016 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2017 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2018 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2019 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2020 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2021 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2022 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2023 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2024 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2025 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2026 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2027 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2028 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2029 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2030 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2031 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2032 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2033 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2034 return checkIntrinsicImmArg<4>(Op, 3, DAG);
2035 case Intrinsic::loongarch_lsx_vsat_w:
2036 case Intrinsic::loongarch_lsx_vsat_wu:
2037 case Intrinsic::loongarch_lsx_vrotri_w:
2038 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2039 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2040 case Intrinsic::loongarch_lsx_vsrlri_w:
2041 case Intrinsic::loongarch_lsx_vsrari_w:
2042 case Intrinsic::loongarch_lsx_vslei_bu:
2043 case Intrinsic::loongarch_lsx_vslei_hu:
2044 case Intrinsic::loongarch_lsx_vslei_wu:
2045 case Intrinsic::loongarch_lsx_vslei_du:
2046 case Intrinsic::loongarch_lsx_vslti_bu:
2047 case Intrinsic::loongarch_lsx_vslti_hu:
2048 case Intrinsic::loongarch_lsx_vslti_wu:
2049 case Intrinsic::loongarch_lsx_vslti_du:
2050 case Intrinsic::loongarch_lsx_vbsll_v:
2051 case Intrinsic::loongarch_lsx_vbsrl_v:
2052 case Intrinsic::loongarch_lasx_xvsat_w:
2053 case Intrinsic::loongarch_lasx_xvsat_wu:
2054 case Intrinsic::loongarch_lasx_xvrotri_w:
2055 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2056 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2057 case Intrinsic::loongarch_lasx_xvsrlri_w:
2058 case Intrinsic::loongarch_lasx_xvsrari_w:
2059 case Intrinsic::loongarch_lasx_xvslei_bu:
2060 case Intrinsic::loongarch_lasx_xvslei_hu:
2061 case Intrinsic::loongarch_lasx_xvslei_wu:
2062 case Intrinsic::loongarch_lasx_xvslei_du:
2063 case Intrinsic::loongarch_lasx_xvslti_bu:
2064 case Intrinsic::loongarch_lasx_xvslti_hu:
2065 case Intrinsic::loongarch_lasx_xvslti_wu:
2066 case Intrinsic::loongarch_lasx_xvslti_du:
2067 case Intrinsic::loongarch_lasx_xvbsll_v:
2068 case Intrinsic::loongarch_lasx_xvbsrl_v:
2069 return checkIntrinsicImmArg<5>(Op, 2, DAG);
2070 case Intrinsic::loongarch_lsx_vseqi_b:
2071 case Intrinsic::loongarch_lsx_vseqi_h:
2072 case Intrinsic::loongarch_lsx_vseqi_w:
2073 case Intrinsic::loongarch_lsx_vseqi_d:
2074 case Intrinsic::loongarch_lsx_vslei_b:
2075 case Intrinsic::loongarch_lsx_vslei_h:
2076 case Intrinsic::loongarch_lsx_vslei_w:
2077 case Intrinsic::loongarch_lsx_vslei_d:
2078 case Intrinsic::loongarch_lsx_vslti_b:
2079 case Intrinsic::loongarch_lsx_vslti_h:
2080 case Intrinsic::loongarch_lsx_vslti_w:
2081 case Intrinsic::loongarch_lsx_vslti_d:
2082 case Intrinsic::loongarch_lasx_xvseqi_b:
2083 case Intrinsic::loongarch_lasx_xvseqi_h:
2084 case Intrinsic::loongarch_lasx_xvseqi_w:
2085 case Intrinsic::loongarch_lasx_xvseqi_d:
2086 case Intrinsic::loongarch_lasx_xvslei_b:
2087 case Intrinsic::loongarch_lasx_xvslei_h:
2088 case Intrinsic::loongarch_lasx_xvslei_w:
2089 case Intrinsic::loongarch_lasx_xvslei_d:
2090 case Intrinsic::loongarch_lasx_xvslti_b:
2091 case Intrinsic::loongarch_lasx_xvslti_h:
2092 case Intrinsic::loongarch_lasx_xvslti_w:
2093 case Intrinsic::loongarch_lasx_xvslti_d:
2094 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2095 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2096 case Intrinsic::loongarch_lsx_vsrani_h_w:
2097 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2098 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2099 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2100 case Intrinsic::loongarch_lsx_vssrani_h_w:
2101 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2102 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2103 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2104 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2105 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2106 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2107 case Intrinsic::loongarch_lsx_vfrstpi_b:
2108 case Intrinsic::loongarch_lsx_vfrstpi_h:
2109 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2110 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2111 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2112 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2113 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2114 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2115 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2116 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2117 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2118 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2119 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2120 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2121 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2122 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2123 return checkIntrinsicImmArg<5>(Op, 3, DAG);
2124 case Intrinsic::loongarch_lsx_vsat_d:
2125 case Intrinsic::loongarch_lsx_vsat_du:
2126 case Intrinsic::loongarch_lsx_vrotri_d:
2127 case Intrinsic::loongarch_lsx_vsrlri_d:
2128 case Intrinsic::loongarch_lsx_vsrari_d:
2129 case Intrinsic::loongarch_lasx_xvsat_d:
2130 case Intrinsic::loongarch_lasx_xvsat_du:
2131 case Intrinsic::loongarch_lasx_xvrotri_d:
2132 case Intrinsic::loongarch_lasx_xvsrlri_d:
2133 case Intrinsic::loongarch_lasx_xvsrari_d:
2134 return checkIntrinsicImmArg<6>(Op, 2, DAG);
2135 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2136 case Intrinsic::loongarch_lsx_vsrani_w_d:
2137 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2138 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2139 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2140 case Intrinsic::loongarch_lsx_vssrani_w_d:
2141 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2142 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2143 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2144 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2145 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2146 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2147 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2148 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2149 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2150 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2151 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2152 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2153 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2154 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2155 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2156 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2157 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2158 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2159 return checkIntrinsicImmArg<6>(Op, 3, DAG);
2160 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2161 case Intrinsic::loongarch_lsx_vsrani_d_q:
2162 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2163 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2164 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2165 case Intrinsic::loongarch_lsx_vssrani_d_q:
2166 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2167 case Intrinsic::loongarch_lsx_vssrani_du_q:
2168 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2169 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2170 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2171 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2172 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2173 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2174 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2175 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2176 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2177 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2178 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2179 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2180 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2181 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2182 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2183 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2184 return checkIntrinsicImmArg<7>(Op, 3, DAG);
2185 case Intrinsic::loongarch_lsx_vnori_b:
2186 case Intrinsic::loongarch_lsx_vshuf4i_b:
2187 case Intrinsic::loongarch_lsx_vshuf4i_h:
2188 case Intrinsic::loongarch_lsx_vshuf4i_w:
2189 case Intrinsic::loongarch_lasx_xvnori_b:
2190 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2191 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2192 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2193 case Intrinsic::loongarch_lasx_xvpermi_d:
2194 return checkIntrinsicImmArg<8>(Op, 2, DAG);
2195 case Intrinsic::loongarch_lsx_vshuf4i_d:
2196 case Intrinsic::loongarch_lsx_vpermi_w:
2197 case Intrinsic::loongarch_lsx_vbitseli_b:
2198 case Intrinsic::loongarch_lsx_vextrins_b:
2199 case Intrinsic::loongarch_lsx_vextrins_h:
2200 case Intrinsic::loongarch_lsx_vextrins_w:
2201 case Intrinsic::loongarch_lsx_vextrins_d:
2202 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2203 case Intrinsic::loongarch_lasx_xvpermi_w:
2204 case Intrinsic::loongarch_lasx_xvpermi_q:
2205 case Intrinsic::loongarch_lasx_xvbitseli_b:
2206 case Intrinsic::loongarch_lasx_xvextrins_b:
2207 case Intrinsic::loongarch_lasx_xvextrins_h:
2208 case Intrinsic::loongarch_lasx_xvextrins_w:
2209 case Intrinsic::loongarch_lasx_xvextrins_d:
2210 return checkIntrinsicImmArg<8>(Op, 3, DAG);
2211 case Intrinsic::loongarch_lsx_vrepli_b:
2212 case Intrinsic::loongarch_lsx_vrepli_h:
2213 case Intrinsic::loongarch_lsx_vrepli_w:
2214 case Intrinsic::loongarch_lsx_vrepli_d:
2215 case Intrinsic::loongarch_lasx_xvrepli_b:
2216 case Intrinsic::loongarch_lasx_xvrepli_h:
2217 case Intrinsic::loongarch_lasx_xvrepli_w:
2218 case Intrinsic::loongarch_lasx_xvrepli_d:
2219 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2220 case Intrinsic::loongarch_lsx_vldi:
2221 case Intrinsic::loongarch_lasx_xvldi:
2222 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2223 }
2224 }
2225
2226 // Helper function that emits error message for intrinsics with chain and return
2227 // merge values of a UNDEF and the chain.
emitIntrinsicWithChainErrorMessage(SDValue Op,StringRef ErrorMsg,SelectionDAG & DAG)2228 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
2229 StringRef ErrorMsg,
2230 SelectionDAG &DAG) {
2231 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2232 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2233 SDLoc(Op));
2234 }
2235
2236 SDValue
lowerINTRINSIC_W_CHAIN(SDValue Op,SelectionDAG & DAG) const2237 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2238 SelectionDAG &DAG) const {
2239 SDLoc DL(Op);
2240 MVT GRLenVT = Subtarget.getGRLenVT();
2241 EVT VT = Op.getValueType();
2242 SDValue Chain = Op.getOperand(0);
2243 const StringRef ErrorMsgOOR = "argument out of range";
2244 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2245 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2246
2247 switch (Op.getConstantOperandVal(1)) {
2248 default:
2249 return Op;
2250 case Intrinsic::loongarch_crc_w_b_w:
2251 case Intrinsic::loongarch_crc_w_h_w:
2252 case Intrinsic::loongarch_crc_w_w_w:
2253 case Intrinsic::loongarch_crc_w_d_w:
2254 case Intrinsic::loongarch_crcc_w_b_w:
2255 case Intrinsic::loongarch_crcc_w_h_w:
2256 case Intrinsic::loongarch_crcc_w_w_w:
2257 case Intrinsic::loongarch_crcc_w_d_w:
2258 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2259 case Intrinsic::loongarch_csrrd_w:
2260 case Intrinsic::loongarch_csrrd_d: {
2261 unsigned Imm = Op.getConstantOperandVal(2);
2262 return !isUInt<14>(Imm)
2263 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2264 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2265 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2266 }
2267 case Intrinsic::loongarch_csrwr_w:
2268 case Intrinsic::loongarch_csrwr_d: {
2269 unsigned Imm = Op.getConstantOperandVal(3);
2270 return !isUInt<14>(Imm)
2271 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2272 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2273 {Chain, Op.getOperand(2),
2274 DAG.getConstant(Imm, DL, GRLenVT)});
2275 }
2276 case Intrinsic::loongarch_csrxchg_w:
2277 case Intrinsic::loongarch_csrxchg_d: {
2278 unsigned Imm = Op.getConstantOperandVal(4);
2279 return !isUInt<14>(Imm)
2280 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2281 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2282 {Chain, Op.getOperand(2), Op.getOperand(3),
2283 DAG.getConstant(Imm, DL, GRLenVT)});
2284 }
2285 case Intrinsic::loongarch_iocsrrd_d: {
2286 return DAG.getNode(
2287 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2288 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2289 }
2290 #define IOCSRRD_CASE(NAME, NODE) \
2291 case Intrinsic::loongarch_##NAME: { \
2292 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2293 {Chain, Op.getOperand(2)}); \
2294 }
2295 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2296 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2297 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2298 #undef IOCSRRD_CASE
2299 case Intrinsic::loongarch_cpucfg: {
2300 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2301 {Chain, Op.getOperand(2)});
2302 }
2303 case Intrinsic::loongarch_lddir_d: {
2304 unsigned Imm = Op.getConstantOperandVal(3);
2305 return !isUInt<8>(Imm)
2306 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2307 : Op;
2308 }
2309 case Intrinsic::loongarch_movfcsr2gr: {
2310 if (!Subtarget.hasBasicF())
2311 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2312 unsigned Imm = Op.getConstantOperandVal(2);
2313 return !isUInt<2>(Imm)
2314 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2315 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2316 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2317 }
2318 case Intrinsic::loongarch_lsx_vld:
2319 case Intrinsic::loongarch_lsx_vldrepl_b:
2320 case Intrinsic::loongarch_lasx_xvld:
2321 case Intrinsic::loongarch_lasx_xvldrepl_b:
2322 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2323 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2324 : SDValue();
2325 case Intrinsic::loongarch_lsx_vldrepl_h:
2326 case Intrinsic::loongarch_lasx_xvldrepl_h:
2327 return !isShiftedInt<11, 1>(
2328 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2329 ? emitIntrinsicWithChainErrorMessage(
2330 Op, "argument out of range or not a multiple of 2", DAG)
2331 : SDValue();
2332 case Intrinsic::loongarch_lsx_vldrepl_w:
2333 case Intrinsic::loongarch_lasx_xvldrepl_w:
2334 return !isShiftedInt<10, 2>(
2335 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2336 ? emitIntrinsicWithChainErrorMessage(
2337 Op, "argument out of range or not a multiple of 4", DAG)
2338 : SDValue();
2339 case Intrinsic::loongarch_lsx_vldrepl_d:
2340 case Intrinsic::loongarch_lasx_xvldrepl_d:
2341 return !isShiftedInt<9, 3>(
2342 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2343 ? emitIntrinsicWithChainErrorMessage(
2344 Op, "argument out of range or not a multiple of 8", DAG)
2345 : SDValue();
2346 }
2347 }
2348
2349 // Helper function that emits error message for intrinsics with void return
2350 // value and return the chain.
emitIntrinsicErrorMessage(SDValue Op,StringRef ErrorMsg,SelectionDAG & DAG)2351 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
2352 SelectionDAG &DAG) {
2353
2354 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2355 return Op.getOperand(0);
2356 }
2357
lowerINTRINSIC_VOID(SDValue Op,SelectionDAG & DAG) const2358 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2359 SelectionDAG &DAG) const {
2360 SDLoc DL(Op);
2361 MVT GRLenVT = Subtarget.getGRLenVT();
2362 SDValue Chain = Op.getOperand(0);
2363 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2364 SDValue Op2 = Op.getOperand(2);
2365 const StringRef ErrorMsgOOR = "argument out of range";
2366 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2367 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2368 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2369
2370 switch (IntrinsicEnum) {
2371 default:
2372 // TODO: Add more Intrinsics.
2373 return SDValue();
2374 case Intrinsic::loongarch_cacop_d:
2375 case Intrinsic::loongarch_cacop_w: {
2376 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2377 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2378 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2379 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2380 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2381 unsigned Imm1 = Op2->getAsZExtVal();
2382 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2383 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2384 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2385 return Op;
2386 }
2387 case Intrinsic::loongarch_dbar: {
2388 unsigned Imm = Op2->getAsZExtVal();
2389 return !isUInt<15>(Imm)
2390 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2391 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2392 DAG.getConstant(Imm, DL, GRLenVT));
2393 }
2394 case Intrinsic::loongarch_ibar: {
2395 unsigned Imm = Op2->getAsZExtVal();
2396 return !isUInt<15>(Imm)
2397 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2398 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2399 DAG.getConstant(Imm, DL, GRLenVT));
2400 }
2401 case Intrinsic::loongarch_break: {
2402 unsigned Imm = Op2->getAsZExtVal();
2403 return !isUInt<15>(Imm)
2404 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2405 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2406 DAG.getConstant(Imm, DL, GRLenVT));
2407 }
2408 case Intrinsic::loongarch_movgr2fcsr: {
2409 if (!Subtarget.hasBasicF())
2410 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2411 unsigned Imm = Op2->getAsZExtVal();
2412 return !isUInt<2>(Imm)
2413 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2414 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2415 DAG.getConstant(Imm, DL, GRLenVT),
2416 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2417 Op.getOperand(3)));
2418 }
2419 case Intrinsic::loongarch_syscall: {
2420 unsigned Imm = Op2->getAsZExtVal();
2421 return !isUInt<15>(Imm)
2422 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2423 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2424 DAG.getConstant(Imm, DL, GRLenVT));
2425 }
2426 #define IOCSRWR_CASE(NAME, NODE) \
2427 case Intrinsic::loongarch_##NAME: { \
2428 SDValue Op3 = Op.getOperand(3); \
2429 return Subtarget.is64Bit() \
2430 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2431 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2432 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2433 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2434 Op3); \
2435 }
2436 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2437 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2438 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2439 #undef IOCSRWR_CASE
2440 case Intrinsic::loongarch_iocsrwr_d: {
2441 return !Subtarget.is64Bit()
2442 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2443 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2444 Op2,
2445 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2446 Op.getOperand(3)));
2447 }
2448 #define ASRT_LE_GT_CASE(NAME) \
2449 case Intrinsic::loongarch_##NAME: { \
2450 return !Subtarget.is64Bit() \
2451 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2452 : Op; \
2453 }
2454 ASRT_LE_GT_CASE(asrtle_d)
2455 ASRT_LE_GT_CASE(asrtgt_d)
2456 #undef ASRT_LE_GT_CASE
2457 case Intrinsic::loongarch_ldpte_d: {
2458 unsigned Imm = Op.getConstantOperandVal(3);
2459 return !Subtarget.is64Bit()
2460 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2461 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2462 : Op;
2463 }
2464 case Intrinsic::loongarch_lsx_vst:
2465 case Intrinsic::loongarch_lasx_xvst:
2466 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2467 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2468 : SDValue();
2469 case Intrinsic::loongarch_lasx_xvstelm_b:
2470 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2471 !isUInt<5>(Op.getConstantOperandVal(5)))
2472 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2473 : SDValue();
2474 case Intrinsic::loongarch_lsx_vstelm_b:
2475 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2476 !isUInt<4>(Op.getConstantOperandVal(5)))
2477 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2478 : SDValue();
2479 case Intrinsic::loongarch_lasx_xvstelm_h:
2480 return (!isShiftedInt<8, 1>(
2481 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2482 !isUInt<4>(Op.getConstantOperandVal(5)))
2483 ? emitIntrinsicErrorMessage(
2484 Op, "argument out of range or not a multiple of 2", DAG)
2485 : SDValue();
2486 case Intrinsic::loongarch_lsx_vstelm_h:
2487 return (!isShiftedInt<8, 1>(
2488 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2489 !isUInt<3>(Op.getConstantOperandVal(5)))
2490 ? emitIntrinsicErrorMessage(
2491 Op, "argument out of range or not a multiple of 2", DAG)
2492 : SDValue();
2493 case Intrinsic::loongarch_lasx_xvstelm_w:
2494 return (!isShiftedInt<8, 2>(
2495 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2496 !isUInt<3>(Op.getConstantOperandVal(5)))
2497 ? emitIntrinsicErrorMessage(
2498 Op, "argument out of range or not a multiple of 4", DAG)
2499 : SDValue();
2500 case Intrinsic::loongarch_lsx_vstelm_w:
2501 return (!isShiftedInt<8, 2>(
2502 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2503 !isUInt<2>(Op.getConstantOperandVal(5)))
2504 ? emitIntrinsicErrorMessage(
2505 Op, "argument out of range or not a multiple of 4", DAG)
2506 : SDValue();
2507 case Intrinsic::loongarch_lasx_xvstelm_d:
2508 return (!isShiftedInt<8, 3>(
2509 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2510 !isUInt<2>(Op.getConstantOperandVal(5)))
2511 ? emitIntrinsicErrorMessage(
2512 Op, "argument out of range or not a multiple of 8", DAG)
2513 : SDValue();
2514 case Intrinsic::loongarch_lsx_vstelm_d:
2515 return (!isShiftedInt<8, 3>(
2516 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2517 !isUInt<1>(Op.getConstantOperandVal(5)))
2518 ? emitIntrinsicErrorMessage(
2519 Op, "argument out of range or not a multiple of 8", DAG)
2520 : SDValue();
2521 }
2522 }
2523
lowerShiftLeftParts(SDValue Op,SelectionDAG & DAG) const2524 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2525 SelectionDAG &DAG) const {
2526 SDLoc DL(Op);
2527 SDValue Lo = Op.getOperand(0);
2528 SDValue Hi = Op.getOperand(1);
2529 SDValue Shamt = Op.getOperand(2);
2530 EVT VT = Lo.getValueType();
2531
2532 // if Shamt-GRLen < 0: // Shamt < GRLen
2533 // Lo = Lo << Shamt
2534 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2535 // else:
2536 // Lo = 0
2537 // Hi = Lo << (Shamt-GRLen)
2538
2539 SDValue Zero = DAG.getConstant(0, DL, VT);
2540 SDValue One = DAG.getConstant(1, DL, VT);
2541 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
2542 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2543 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2544 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2545
2546 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2547 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2548 SDValue ShiftRightLo =
2549 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2550 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2551 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2552 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2553
2554 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2555
2556 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2557 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2558
2559 SDValue Parts[2] = {Lo, Hi};
2560 return DAG.getMergeValues(Parts, DL);
2561 }
2562
lowerShiftRightParts(SDValue Op,SelectionDAG & DAG,bool IsSRA) const2563 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2564 SelectionDAG &DAG,
2565 bool IsSRA) const {
2566 SDLoc DL(Op);
2567 SDValue Lo = Op.getOperand(0);
2568 SDValue Hi = Op.getOperand(1);
2569 SDValue Shamt = Op.getOperand(2);
2570 EVT VT = Lo.getValueType();
2571
2572 // SRA expansion:
2573 // if Shamt-GRLen < 0: // Shamt < GRLen
2574 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2575 // Hi = Hi >>s Shamt
2576 // else:
2577 // Lo = Hi >>s (Shamt-GRLen);
2578 // Hi = Hi >>s (GRLen-1)
2579 //
2580 // SRL expansion:
2581 // if Shamt-GRLen < 0: // Shamt < GRLen
2582 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2583 // Hi = Hi >>u Shamt
2584 // else:
2585 // Lo = Hi >>u (Shamt-GRLen);
2586 // Hi = 0;
2587
2588 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2589
2590 SDValue Zero = DAG.getConstant(0, DL, VT);
2591 SDValue One = DAG.getConstant(1, DL, VT);
2592 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
2593 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2594 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2595 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2596
2597 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2598 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2599 SDValue ShiftLeftHi =
2600 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2601 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2602 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2603 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2604 SDValue HiFalse =
2605 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2606
2607 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2608
2609 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2610 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2611
2612 SDValue Parts[2] = {Lo, Hi};
2613 return DAG.getMergeValues(Parts, DL);
2614 }
2615
2616 // Returns the opcode of the target-specific SDNode that implements the 32-bit
2617 // form of the given Opcode.
getLoongArchWOpcode(unsigned Opcode)2618 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
2619 switch (Opcode) {
2620 default:
2621 llvm_unreachable("Unexpected opcode");
2622 case ISD::UDIV:
2623 return LoongArchISD::DIV_WU;
2624 case ISD::UREM:
2625 return LoongArchISD::MOD_WU;
2626 case ISD::SHL:
2627 return LoongArchISD::SLL_W;
2628 case ISD::SRA:
2629 return LoongArchISD::SRA_W;
2630 case ISD::SRL:
2631 return LoongArchISD::SRL_W;
2632 case ISD::ROTL:
2633 case ISD::ROTR:
2634 return LoongArchISD::ROTR_W;
2635 case ISD::CTTZ:
2636 return LoongArchISD::CTZ_W;
2637 case ISD::CTLZ:
2638 return LoongArchISD::CLZ_W;
2639 }
2640 }
2641
2642 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2643 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2644 // otherwise be promoted to i64, making it difficult to select the
2645 // SLL_W/.../*W later one because the fact the operation was originally of
2646 // type i8/i16/i32 is lost.
customLegalizeToWOp(SDNode * N,SelectionDAG & DAG,int NumOp,unsigned ExtOpc=ISD::ANY_EXTEND)2647 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
2648 unsigned ExtOpc = ISD::ANY_EXTEND) {
2649 SDLoc DL(N);
2650 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2651 SDValue NewOp0, NewRes;
2652
2653 switch (NumOp) {
2654 default:
2655 llvm_unreachable("Unexpected NumOp");
2656 case 1: {
2657 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2658 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2659 break;
2660 }
2661 case 2: {
2662 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2663 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2664 if (N->getOpcode() == ISD::ROTL) {
2665 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2666 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2667 }
2668 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2669 break;
2670 }
2671 // TODO:Handle more NumOp.
2672 }
2673
2674 // ReplaceNodeResults requires we maintain the same type for the return
2675 // value.
2676 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2677 }
2678
2679 // Converts the given 32-bit operation to a i64 operation with signed extension
2680 // semantic to reduce the signed extension instructions.
customLegalizeToWOpWithSExt(SDNode * N,SelectionDAG & DAG)2681 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
2682 SDLoc DL(N);
2683 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2684 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2685 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2686 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2687 DAG.getValueType(MVT::i32));
2688 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2689 }
2690
2691 // Helper function that emits error message for intrinsics with/without chain
2692 // and return a UNDEF or and the chain as the results.
emitErrorAndReplaceIntrinsicResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,StringRef ErrorMsg,bool WithChain=true)2693 static void emitErrorAndReplaceIntrinsicResults(
2694 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
2695 StringRef ErrorMsg, bool WithChain = true) {
2696 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2697 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2698 if (!WithChain)
2699 return;
2700 Results.push_back(N->getOperand(0));
2701 }
2702
2703 template <unsigned N>
2704 static void
replaceVPICKVE2GRResults(SDNode * Node,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,unsigned ResOp)2705 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
2706 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2707 unsigned ResOp) {
2708 const StringRef ErrorMsgOOR = "argument out of range";
2709 unsigned Imm = Node->getConstantOperandVal(2);
2710 if (!isUInt<N>(Imm)) {
2711 emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
2712 /*WithChain=*/false);
2713 return;
2714 }
2715 SDLoc DL(Node);
2716 SDValue Vec = Node->getOperand(1);
2717
2718 SDValue PickElt =
2719 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2720 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2721 DAG.getValueType(Vec.getValueType().getVectorElementType()));
2722 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2723 PickElt.getValue(0)));
2724 }
2725
replaceVecCondBranchResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,unsigned ResOp)2726 static void replaceVecCondBranchResults(SDNode *N,
2727 SmallVectorImpl<SDValue> &Results,
2728 SelectionDAG &DAG,
2729 const LoongArchSubtarget &Subtarget,
2730 unsigned ResOp) {
2731 SDLoc DL(N);
2732 SDValue Vec = N->getOperand(1);
2733
2734 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2735 Results.push_back(
2736 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2737 }
2738
2739 static void
replaceINTRINSIC_WO_CHAINResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget)2740 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
2741 SelectionDAG &DAG,
2742 const LoongArchSubtarget &Subtarget) {
2743 switch (N->getConstantOperandVal(0)) {
2744 default:
2745 llvm_unreachable("Unexpected Intrinsic.");
2746 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2747 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2748 LoongArchISD::VPICK_SEXT_ELT);
2749 break;
2750 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2751 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2752 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2753 LoongArchISD::VPICK_SEXT_ELT);
2754 break;
2755 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2756 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2757 LoongArchISD::VPICK_SEXT_ELT);
2758 break;
2759 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2760 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2761 LoongArchISD::VPICK_ZEXT_ELT);
2762 break;
2763 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2764 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2765 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2766 LoongArchISD::VPICK_ZEXT_ELT);
2767 break;
2768 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2769 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2770 LoongArchISD::VPICK_ZEXT_ELT);
2771 break;
2772 case Intrinsic::loongarch_lsx_bz_b:
2773 case Intrinsic::loongarch_lsx_bz_h:
2774 case Intrinsic::loongarch_lsx_bz_w:
2775 case Intrinsic::loongarch_lsx_bz_d:
2776 case Intrinsic::loongarch_lasx_xbz_b:
2777 case Intrinsic::loongarch_lasx_xbz_h:
2778 case Intrinsic::loongarch_lasx_xbz_w:
2779 case Intrinsic::loongarch_lasx_xbz_d:
2780 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2781 LoongArchISD::VALL_ZERO);
2782 break;
2783 case Intrinsic::loongarch_lsx_bz_v:
2784 case Intrinsic::loongarch_lasx_xbz_v:
2785 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2786 LoongArchISD::VANY_ZERO);
2787 break;
2788 case Intrinsic::loongarch_lsx_bnz_b:
2789 case Intrinsic::loongarch_lsx_bnz_h:
2790 case Intrinsic::loongarch_lsx_bnz_w:
2791 case Intrinsic::loongarch_lsx_bnz_d:
2792 case Intrinsic::loongarch_lasx_xbnz_b:
2793 case Intrinsic::loongarch_lasx_xbnz_h:
2794 case Intrinsic::loongarch_lasx_xbnz_w:
2795 case Intrinsic::loongarch_lasx_xbnz_d:
2796 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2797 LoongArchISD::VALL_NONZERO);
2798 break;
2799 case Intrinsic::loongarch_lsx_bnz_v:
2800 case Intrinsic::loongarch_lasx_xbnz_v:
2801 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2802 LoongArchISD::VANY_NONZERO);
2803 break;
2804 }
2805 }
2806
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const2807 void LoongArchTargetLowering::ReplaceNodeResults(
2808 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
2809 SDLoc DL(N);
2810 EVT VT = N->getValueType(0);
2811 switch (N->getOpcode()) {
2812 default:
2813 llvm_unreachable("Don't know how to legalize this operation");
2814 case ISD::ADD:
2815 case ISD::SUB:
2816 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2817 "Unexpected custom legalisation");
2818 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2819 break;
2820 case ISD::UDIV:
2821 case ISD::UREM:
2822 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2823 "Unexpected custom legalisation");
2824 Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND));
2825 break;
2826 case ISD::SHL:
2827 case ISD::SRA:
2828 case ISD::SRL:
2829 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2830 "Unexpected custom legalisation");
2831 if (N->getOperand(1).getOpcode() != ISD::Constant) {
2832 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2833 break;
2834 }
2835 break;
2836 case ISD::ROTL:
2837 case ISD::ROTR:
2838 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2839 "Unexpected custom legalisation");
2840 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2841 break;
2842 case ISD::FP_TO_SINT: {
2843 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2844 "Unexpected custom legalisation");
2845 SDValue Src = N->getOperand(0);
2846 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2847 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2848 TargetLowering::TypeSoftenFloat) {
2849 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2850 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2851 return;
2852 }
2853 // If the FP type needs to be softened, emit a library call using the 'si'
2854 // version. If we left it to default legalization we'd end up with 'di'.
2855 RTLIB::Libcall LC;
2856 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2857 MakeLibCallOptions CallOptions;
2858 EVT OpVT = Src.getValueType();
2859 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2860 SDValue Chain = SDValue();
2861 SDValue Result;
2862 std::tie(Result, Chain) =
2863 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2864 Results.push_back(Result);
2865 break;
2866 }
2867 case ISD::BITCAST: {
2868 SDValue Src = N->getOperand(0);
2869 EVT SrcVT = Src.getValueType();
2870 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2871 Subtarget.hasBasicF()) {
2872 SDValue Dst =
2873 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2874 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2875 }
2876 break;
2877 }
2878 case ISD::FP_TO_UINT: {
2879 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2880 "Unexpected custom legalisation");
2881 auto &TLI = DAG.getTargetLoweringInfo();
2882 SDValue Tmp1, Tmp2;
2883 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2884 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2885 break;
2886 }
2887 case ISD::BSWAP: {
2888 SDValue Src = N->getOperand(0);
2889 assert((VT == MVT::i16 || VT == MVT::i32) &&
2890 "Unexpected custom legalization");
2891 MVT GRLenVT = Subtarget.getGRLenVT();
2892 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2893 SDValue Tmp;
2894 switch (VT.getSizeInBits()) {
2895 default:
2896 llvm_unreachable("Unexpected operand width");
2897 case 16:
2898 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
2899 break;
2900 case 32:
2901 // Only LA64 will get to here due to the size mismatch between VT and
2902 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2903 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
2904 break;
2905 }
2906 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2907 break;
2908 }
2909 case ISD::BITREVERSE: {
2910 SDValue Src = N->getOperand(0);
2911 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
2912 "Unexpected custom legalization");
2913 MVT GRLenVT = Subtarget.getGRLenVT();
2914 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2915 SDValue Tmp;
2916 switch (VT.getSizeInBits()) {
2917 default:
2918 llvm_unreachable("Unexpected operand width");
2919 case 8:
2920 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
2921 break;
2922 case 32:
2923 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
2924 break;
2925 }
2926 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2927 break;
2928 }
2929 case ISD::CTLZ:
2930 case ISD::CTTZ: {
2931 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2932 "Unexpected custom legalisation");
2933 Results.push_back(customLegalizeToWOp(N, DAG, 1));
2934 break;
2935 }
2936 case ISD::INTRINSIC_W_CHAIN: {
2937 SDValue Chain = N->getOperand(0);
2938 SDValue Op2 = N->getOperand(2);
2939 MVT GRLenVT = Subtarget.getGRLenVT();
2940 const StringRef ErrorMsgOOR = "argument out of range";
2941 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2942 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2943
2944 switch (N->getConstantOperandVal(1)) {
2945 default:
2946 llvm_unreachable("Unexpected Intrinsic.");
2947 case Intrinsic::loongarch_movfcsr2gr: {
2948 if (!Subtarget.hasBasicF()) {
2949 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
2950 return;
2951 }
2952 unsigned Imm = Op2->getAsZExtVal();
2953 if (!isUInt<2>(Imm)) {
2954 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2955 return;
2956 }
2957 SDValue MOVFCSR2GRResults = DAG.getNode(
2958 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
2959 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2960 Results.push_back(
2961 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
2962 Results.push_back(MOVFCSR2GRResults.getValue(1));
2963 break;
2964 }
2965 #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
2966 case Intrinsic::loongarch_##NAME: { \
2967 SDValue NODE = DAG.getNode( \
2968 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2969 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2970 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2971 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2972 Results.push_back(NODE.getValue(1)); \
2973 break; \
2974 }
2975 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
2976 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
2977 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
2978 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
2979 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
2980 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
2981 #undef CRC_CASE_EXT_BINARYOP
2982
2983 #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
2984 case Intrinsic::loongarch_##NAME: { \
2985 SDValue NODE = DAG.getNode( \
2986 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2987 {Chain, Op2, \
2988 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2989 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2990 Results.push_back(NODE.getValue(1)); \
2991 break; \
2992 }
2993 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
2994 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
2995 #undef CRC_CASE_EXT_UNARYOP
2996 #define CSR_CASE(ID) \
2997 case Intrinsic::loongarch_##ID: { \
2998 if (!Subtarget.is64Bit()) \
2999 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3000 break; \
3001 }
3002 CSR_CASE(csrrd_d);
3003 CSR_CASE(csrwr_d);
3004 CSR_CASE(csrxchg_d);
3005 CSR_CASE(iocsrrd_d);
3006 #undef CSR_CASE
3007 case Intrinsic::loongarch_csrrd_w: {
3008 unsigned Imm = Op2->getAsZExtVal();
3009 if (!isUInt<14>(Imm)) {
3010 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3011 return;
3012 }
3013 SDValue CSRRDResults =
3014 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3015 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3016 Results.push_back(
3017 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3018 Results.push_back(CSRRDResults.getValue(1));
3019 break;
3020 }
3021 case Intrinsic::loongarch_csrwr_w: {
3022 unsigned Imm = N->getConstantOperandVal(3);
3023 if (!isUInt<14>(Imm)) {
3024 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3025 return;
3026 }
3027 SDValue CSRWRResults =
3028 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3029 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3030 DAG.getConstant(Imm, DL, GRLenVT)});
3031 Results.push_back(
3032 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3033 Results.push_back(CSRWRResults.getValue(1));
3034 break;
3035 }
3036 case Intrinsic::loongarch_csrxchg_w: {
3037 unsigned Imm = N->getConstantOperandVal(4);
3038 if (!isUInt<14>(Imm)) {
3039 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3040 return;
3041 }
3042 SDValue CSRXCHGResults = DAG.getNode(
3043 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3044 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3045 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3046 DAG.getConstant(Imm, DL, GRLenVT)});
3047 Results.push_back(
3048 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3049 Results.push_back(CSRXCHGResults.getValue(1));
3050 break;
3051 }
3052 #define IOCSRRD_CASE(NAME, NODE) \
3053 case Intrinsic::loongarch_##NAME: { \
3054 SDValue IOCSRRDResults = \
3055 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3056 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3057 Results.push_back( \
3058 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3059 Results.push_back(IOCSRRDResults.getValue(1)); \
3060 break; \
3061 }
3062 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3063 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3064 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3065 #undef IOCSRRD_CASE
3066 case Intrinsic::loongarch_cpucfg: {
3067 SDValue CPUCFGResults =
3068 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3069 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3070 Results.push_back(
3071 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3072 Results.push_back(CPUCFGResults.getValue(1));
3073 break;
3074 }
3075 case Intrinsic::loongarch_lddir_d: {
3076 if (!Subtarget.is64Bit()) {
3077 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3078 return;
3079 }
3080 break;
3081 }
3082 }
3083 break;
3084 }
3085 case ISD::READ_REGISTER: {
3086 if (Subtarget.is64Bit())
3087 DAG.getContext()->emitError(
3088 "On LA64, only 64-bit registers can be read.");
3089 else
3090 DAG.getContext()->emitError(
3091 "On LA32, only 32-bit registers can be read.");
3092 Results.push_back(DAG.getUNDEF(VT));
3093 Results.push_back(N->getOperand(0));
3094 break;
3095 }
3096 case ISD::INTRINSIC_WO_CHAIN: {
3097 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3098 break;
3099 }
3100 }
3101 }
3102
performANDCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)3103 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
3104 TargetLowering::DAGCombinerInfo &DCI,
3105 const LoongArchSubtarget &Subtarget) {
3106 if (DCI.isBeforeLegalizeOps())
3107 return SDValue();
3108
3109 SDValue FirstOperand = N->getOperand(0);
3110 SDValue SecondOperand = N->getOperand(1);
3111 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3112 EVT ValTy = N->getValueType(0);
3113 SDLoc DL(N);
3114 uint64_t lsb, msb;
3115 unsigned SMIdx, SMLen;
3116 ConstantSDNode *CN;
3117 SDValue NewOperand;
3118 MVT GRLenVT = Subtarget.getGRLenVT();
3119
3120 // Op's second operand must be a shifted mask.
3121 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3122 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3123 return SDValue();
3124
3125 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3126 // Pattern match BSTRPICK.
3127 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3128 // => BSTRPICK $dst, $src, msb, lsb
3129 // where msb = lsb + len - 1
3130
3131 // The second operand of the shift must be an immediate.
3132 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3133 return SDValue();
3134
3135 lsb = CN->getZExtValue();
3136
3137 // Return if the shifted mask does not start at bit 0 or the sum of its
3138 // length and lsb exceeds the word's size.
3139 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3140 return SDValue();
3141
3142 NewOperand = FirstOperand.getOperand(0);
3143 } else {
3144 // Pattern match BSTRPICK.
3145 // $dst = and $src, (2**len- 1) , if len > 12
3146 // => BSTRPICK $dst, $src, msb, lsb
3147 // where lsb = 0 and msb = len - 1
3148
3149 // If the mask is <= 0xfff, andi can be used instead.
3150 if (CN->getZExtValue() <= 0xfff)
3151 return SDValue();
3152
3153 // Return if the MSB exceeds.
3154 if (SMIdx + SMLen > ValTy.getSizeInBits())
3155 return SDValue();
3156
3157 if (SMIdx > 0) {
3158 // Omit if the constant has more than 2 uses. This a conservative
3159 // decision. Whether it is a win depends on the HW microarchitecture.
3160 // However it should always be better for 1 and 2 uses.
3161 if (CN->use_size() > 2)
3162 return SDValue();
3163 // Return if the constant can be composed by a single LU12I.W.
3164 if ((CN->getZExtValue() & 0xfff) == 0)
3165 return SDValue();
3166 // Return if the constand can be composed by a single ADDI with
3167 // the zero register.
3168 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3169 return SDValue();
3170 }
3171
3172 lsb = SMIdx;
3173 NewOperand = FirstOperand;
3174 }
3175
3176 msb = lsb + SMLen - 1;
3177 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3178 DAG.getConstant(msb, DL, GRLenVT),
3179 DAG.getConstant(lsb, DL, GRLenVT));
3180 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3181 return NR0;
3182 // Try to optimize to
3183 // bstrpick $Rd, $Rs, msb, lsb
3184 // slli $Rd, $Rd, lsb
3185 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3186 DAG.getConstant(lsb, DL, GRLenVT));
3187 }
3188
performSRLCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)3189 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
3190 TargetLowering::DAGCombinerInfo &DCI,
3191 const LoongArchSubtarget &Subtarget) {
3192 if (DCI.isBeforeLegalizeOps())
3193 return SDValue();
3194
3195 // $dst = srl (and $src, Mask), Shamt
3196 // =>
3197 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3198 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3199 //
3200
3201 SDValue FirstOperand = N->getOperand(0);
3202 ConstantSDNode *CN;
3203 EVT ValTy = N->getValueType(0);
3204 SDLoc DL(N);
3205 MVT GRLenVT = Subtarget.getGRLenVT();
3206 unsigned MaskIdx, MaskLen;
3207 uint64_t Shamt;
3208
3209 // The first operand must be an AND and the second operand of the AND must be
3210 // a shifted mask.
3211 if (FirstOperand.getOpcode() != ISD::AND ||
3212 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3213 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3214 return SDValue();
3215
3216 // The second operand (shift amount) must be an immediate.
3217 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3218 return SDValue();
3219
3220 Shamt = CN->getZExtValue();
3221 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3222 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3223 FirstOperand->getOperand(0),
3224 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3225 DAG.getConstant(Shamt, DL, GRLenVT));
3226
3227 return SDValue();
3228 }
3229
performORCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)3230 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
3231 TargetLowering::DAGCombinerInfo &DCI,
3232 const LoongArchSubtarget &Subtarget) {
3233 MVT GRLenVT = Subtarget.getGRLenVT();
3234 EVT ValTy = N->getValueType(0);
3235 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3236 ConstantSDNode *CN0, *CN1;
3237 SDLoc DL(N);
3238 unsigned ValBits = ValTy.getSizeInBits();
3239 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3240 unsigned Shamt;
3241 bool SwapAndRetried = false;
3242
3243 if (DCI.isBeforeLegalizeOps())
3244 return SDValue();
3245
3246 if (ValBits != 32 && ValBits != 64)
3247 return SDValue();
3248
3249 Retry:
3250 // 1st pattern to match BSTRINS:
3251 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3252 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3253 // =>
3254 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3255 if (N0.getOpcode() == ISD::AND &&
3256 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3257 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3258 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3259 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3260 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3261 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3262 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3263 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3264 (MaskIdx0 + MaskLen0 <= ValBits)) {
3265 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3266 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3267 N1.getOperand(0).getOperand(0),
3268 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3269 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3270 }
3271
3272 // 2nd pattern to match BSTRINS:
3273 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3274 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3275 // =>
3276 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3277 if (N0.getOpcode() == ISD::AND &&
3278 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3279 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3280 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3281 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3282 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3283 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3284 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3285 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3286 (MaskIdx0 + MaskLen0 <= ValBits)) {
3287 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3288 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3289 N1.getOperand(0).getOperand(0),
3290 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3291 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3292 }
3293
3294 // 3rd pattern to match BSTRINS:
3295 // R = or (and X, mask0), (and Y, mask1)
3296 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3297 // =>
3298 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3299 // where msb = lsb + size - 1
3300 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3301 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3302 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3303 (MaskIdx0 + MaskLen0 <= 64) &&
3304 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3305 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3306 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3307 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3308 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3309 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3310 DAG.getConstant(ValBits == 32
3311 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3312 : (MaskIdx0 + MaskLen0 - 1),
3313 DL, GRLenVT),
3314 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3315 }
3316
3317 // 4th pattern to match BSTRINS:
3318 // R = or (and X, mask), (shl Y, shamt)
3319 // where mask = (2**shamt - 1)
3320 // =>
3321 // R = BSTRINS X, Y, ValBits - 1, shamt
3322 // where ValBits = 32 or 64
3323 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3324 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3325 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3326 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3327 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3328 (MaskIdx0 + MaskLen0 <= ValBits)) {
3329 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3330 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3331 N1.getOperand(0),
3332 DAG.getConstant((ValBits - 1), DL, GRLenVT),
3333 DAG.getConstant(Shamt, DL, GRLenVT));
3334 }
3335
3336 // 5th pattern to match BSTRINS:
3337 // R = or (and X, mask), const
3338 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3339 // =>
3340 // R = BSTRINS X, (const >> lsb), msb, lsb
3341 // where msb = lsb + size - 1
3342 if (N0.getOpcode() == ISD::AND &&
3343 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3344 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3345 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3346 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3347 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3348 return DAG.getNode(
3349 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3350 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3351 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3352 : (MaskIdx0 + MaskLen0 - 1),
3353 DL, GRLenVT),
3354 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3355 }
3356
3357 // 6th pattern.
3358 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3359 // by the incoming bits are known to be zero.
3360 // =>
3361 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3362 //
3363 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3364 // pattern is more common than the 1st. So we put the 1st before the 6th in
3365 // order to match as many nodes as possible.
3366 ConstantSDNode *CNMask, *CNShamt;
3367 unsigned MaskIdx, MaskLen;
3368 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3369 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3370 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3371 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3372 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3373 Shamt = CNShamt->getZExtValue();
3374 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3375 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3376 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3377 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3378 N1.getOperand(0).getOperand(0),
3379 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3380 DAG.getConstant(Shamt, DL, GRLenVT));
3381 }
3382 }
3383
3384 // 7th pattern.
3385 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3386 // overwritten by the incoming bits are known to be zero.
3387 // =>
3388 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3389 //
3390 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3391 // before the 7th in order to match as many nodes as possible.
3392 if (N1.getOpcode() == ISD::AND &&
3393 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3394 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3395 N1.getOperand(0).getOpcode() == ISD::SHL &&
3396 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3397 CNShamt->getZExtValue() == MaskIdx) {
3398 APInt ShMask(ValBits, CNMask->getZExtValue());
3399 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3400 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3401 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3402 N1.getOperand(0).getOperand(0),
3403 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3404 DAG.getConstant(MaskIdx, DL, GRLenVT));
3405 }
3406 }
3407
3408 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3409 if (!SwapAndRetried) {
3410 std::swap(N0, N1);
3411 SwapAndRetried = true;
3412 goto Retry;
3413 }
3414
3415 SwapAndRetried = false;
3416 Retry2:
3417 // 8th pattern.
3418 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3419 // the incoming bits are known to be zero.
3420 // =>
3421 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3422 //
3423 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3424 // we put it here in order to match as many nodes as possible or generate less
3425 // instructions.
3426 if (N1.getOpcode() == ISD::AND &&
3427 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3428 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3429 APInt ShMask(ValBits, CNMask->getZExtValue());
3430 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3431 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3432 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3433 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3434 N1->getOperand(0),
3435 DAG.getConstant(MaskIdx, DL, GRLenVT)),
3436 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3437 DAG.getConstant(MaskIdx, DL, GRLenVT));
3438 }
3439 }
3440 // Swap N0/N1 and retry.
3441 if (!SwapAndRetried) {
3442 std::swap(N0, N1);
3443 SwapAndRetried = true;
3444 goto Retry2;
3445 }
3446
3447 return SDValue();
3448 }
3449
checkValueWidth(SDValue V,ISD::LoadExtType & ExtType)3450 static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3451 ExtType = ISD::NON_EXTLOAD;
3452
3453 switch (V.getNode()->getOpcode()) {
3454 case ISD::LOAD: {
3455 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3456 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3457 (LoadNode->getMemoryVT() == MVT::i16)) {
3458 ExtType = LoadNode->getExtensionType();
3459 return true;
3460 }
3461 return false;
3462 }
3463 case ISD::AssertSext: {
3464 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3465 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3466 ExtType = ISD::SEXTLOAD;
3467 return true;
3468 }
3469 return false;
3470 }
3471 case ISD::AssertZext: {
3472 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3473 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3474 ExtType = ISD::ZEXTLOAD;
3475 return true;
3476 }
3477 return false;
3478 }
3479 default:
3480 return false;
3481 }
3482
3483 return false;
3484 }
3485
3486 // Eliminate redundant truncation and zero-extension nodes.
3487 // * Case 1:
3488 // +------------+ +------------+ +------------+
3489 // | Input1 | | Input2 | | CC |
3490 // +------------+ +------------+ +------------+
3491 // | | |
3492 // V V +----+
3493 // +------------+ +------------+ |
3494 // | TRUNCATE | | TRUNCATE | |
3495 // +------------+ +------------+ |
3496 // | | |
3497 // V V |
3498 // +------------+ +------------+ |
3499 // | ZERO_EXT | | ZERO_EXT | |
3500 // +------------+ +------------+ |
3501 // | | |
3502 // | +-------------+ |
3503 // V V | |
3504 // +----------------+ | |
3505 // | AND | | |
3506 // +----------------+ | |
3507 // | | |
3508 // +---------------+ | |
3509 // | | |
3510 // V V V
3511 // +-------------+
3512 // | CMP |
3513 // +-------------+
3514 // * Case 2:
3515 // +------------+ +------------+ +-------------+ +------------+ +------------+
3516 // | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3517 // +------------+ +------------+ +-------------+ +------------+ +------------+
3518 // | | | | |
3519 // V | | | |
3520 // +------------+ | | | |
3521 // | XOR |<---------------------+ | |
3522 // +------------+ | | |
3523 // | | | |
3524 // V V +---------------+ |
3525 // +------------+ +------------+ | |
3526 // | TRUNCATE | | TRUNCATE | | +-------------------------+
3527 // +------------+ +------------+ | |
3528 // | | | |
3529 // V V | |
3530 // +------------+ +------------+ | |
3531 // | ZERO_EXT | | ZERO_EXT | | |
3532 // +------------+ +------------+ | |
3533 // | | | |
3534 // V V | |
3535 // +----------------+ | |
3536 // | AND | | |
3537 // +----------------+ | |
3538 // | | |
3539 // +---------------+ | |
3540 // | | |
3541 // V V V
3542 // +-------------+
3543 // | CMP |
3544 // +-------------+
performSETCCCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)3545 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
3546 TargetLowering::DAGCombinerInfo &DCI,
3547 const LoongArchSubtarget &Subtarget) {
3548 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3549
3550 SDNode *AndNode = N->getOperand(0).getNode();
3551 if (AndNode->getOpcode() != ISD::AND)
3552 return SDValue();
3553
3554 SDValue AndInputValue2 = AndNode->getOperand(1);
3555 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3556 return SDValue();
3557
3558 SDValue CmpInputValue = N->getOperand(1);
3559 SDValue AndInputValue1 = AndNode->getOperand(0);
3560 if (AndInputValue1.getOpcode() == ISD::XOR) {
3561 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3562 return SDValue();
3563 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3564 if (!CN || CN->getSExtValue() != -1)
3565 return SDValue();
3566 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3567 if (!CN || CN->getSExtValue() != 0)
3568 return SDValue();
3569 AndInputValue1 = AndInputValue1.getOperand(0);
3570 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3571 return SDValue();
3572 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3573 if (AndInputValue2 != CmpInputValue)
3574 return SDValue();
3575 } else {
3576 return SDValue();
3577 }
3578
3579 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3580 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3581 return SDValue();
3582
3583 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3584 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3585 return SDValue();
3586
3587 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3588 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3589 ISD::LoadExtType ExtType1;
3590 ISD::LoadExtType ExtType2;
3591
3592 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3593 !checkValueWidth(TruncInputValue2, ExtType2))
3594 return SDValue();
3595
3596 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3597 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3598 return SDValue();
3599
3600 if ((ExtType2 != ISD::ZEXTLOAD) &&
3601 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3602 return SDValue();
3603
3604 // These truncation and zero-extension nodes are not necessary, remove them.
3605 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3606 TruncInputValue1, TruncInputValue2);
3607 SDValue NewSetCC =
3608 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3609 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3610 return SDValue(N, 0);
3611 }
3612
3613 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
performBITREV_WCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)3614 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
3615 TargetLowering::DAGCombinerInfo &DCI,
3616 const LoongArchSubtarget &Subtarget) {
3617 if (DCI.isBeforeLegalizeOps())
3618 return SDValue();
3619
3620 SDValue Src = N->getOperand(0);
3621 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3622 return SDValue();
3623
3624 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3625 Src.getOperand(0));
3626 }
3627
3628 template <unsigned N>
legalizeIntrinsicImmArg(SDNode * Node,unsigned ImmOp,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,bool IsSigned=false)3629 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
3630 SelectionDAG &DAG,
3631 const LoongArchSubtarget &Subtarget,
3632 bool IsSigned = false) {
3633 SDLoc DL(Node);
3634 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3635 // Check the ImmArg.
3636 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3637 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3638 DAG.getContext()->emitError(Node->getOperationName(0) +
3639 ": argument out of range.");
3640 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3641 }
3642 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3643 }
3644
3645 template <unsigned N>
lowerVectorSplatImm(SDNode * Node,unsigned ImmOp,SelectionDAG & DAG,bool IsSigned=false)3646 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3647 SelectionDAG &DAG, bool IsSigned = false) {
3648 SDLoc DL(Node);
3649 EVT ResTy = Node->getValueType(0);
3650 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3651
3652 // Check the ImmArg.
3653 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3654 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3655 DAG.getContext()->emitError(Node->getOperationName(0) +
3656 ": argument out of range.");
3657 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3658 }
3659 return DAG.getConstant(
3660 APInt(ResTy.getScalarType().getSizeInBits(),
3661 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3662 DL, ResTy);
3663 }
3664
truncateVecElts(SDNode * Node,SelectionDAG & DAG)3665 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
3666 SDLoc DL(Node);
3667 EVT ResTy = Node->getValueType(0);
3668 SDValue Vec = Node->getOperand(2);
3669 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3670 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3671 }
3672
lowerVectorBitClear(SDNode * Node,SelectionDAG & DAG)3673 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
3674 SDLoc DL(Node);
3675 EVT ResTy = Node->getValueType(0);
3676 SDValue One = DAG.getConstant(1, DL, ResTy);
3677 SDValue Bit =
3678 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3679
3680 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3681 DAG.getNOT(DL, Bit, ResTy));
3682 }
3683
3684 template <unsigned N>
lowerVectorBitClearImm(SDNode * Node,SelectionDAG & DAG)3685 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
3686 SDLoc DL(Node);
3687 EVT ResTy = Node->getValueType(0);
3688 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3689 // Check the unsigned ImmArg.
3690 if (!isUInt<N>(CImm->getZExtValue())) {
3691 DAG.getContext()->emitError(Node->getOperationName(0) +
3692 ": argument out of range.");
3693 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3694 }
3695
3696 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3697 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3698
3699 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3700 }
3701
3702 template <unsigned N>
lowerVectorBitSetImm(SDNode * Node,SelectionDAG & DAG)3703 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
3704 SDLoc DL(Node);
3705 EVT ResTy = Node->getValueType(0);
3706 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3707 // Check the unsigned ImmArg.
3708 if (!isUInt<N>(CImm->getZExtValue())) {
3709 DAG.getContext()->emitError(Node->getOperationName(0) +
3710 ": argument out of range.");
3711 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3712 }
3713
3714 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3715 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3716 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3717 }
3718
3719 template <unsigned N>
lowerVectorBitRevImm(SDNode * Node,SelectionDAG & DAG)3720 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
3721 SDLoc DL(Node);
3722 EVT ResTy = Node->getValueType(0);
3723 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3724 // Check the unsigned ImmArg.
3725 if (!isUInt<N>(CImm->getZExtValue())) {
3726 DAG.getContext()->emitError(Node->getOperationName(0) +
3727 ": argument out of range.");
3728 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3729 }
3730
3731 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3732 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3733 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3734 }
3735
3736 static SDValue
performINTRINSIC_WO_CHAINCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)3737 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
3738 TargetLowering::DAGCombinerInfo &DCI,
3739 const LoongArchSubtarget &Subtarget) {
3740 SDLoc DL(N);
3741 switch (N->getConstantOperandVal(0)) {
3742 default:
3743 break;
3744 case Intrinsic::loongarch_lsx_vadd_b:
3745 case Intrinsic::loongarch_lsx_vadd_h:
3746 case Intrinsic::loongarch_lsx_vadd_w:
3747 case Intrinsic::loongarch_lsx_vadd_d:
3748 case Intrinsic::loongarch_lasx_xvadd_b:
3749 case Intrinsic::loongarch_lasx_xvadd_h:
3750 case Intrinsic::loongarch_lasx_xvadd_w:
3751 case Intrinsic::loongarch_lasx_xvadd_d:
3752 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3753 N->getOperand(2));
3754 case Intrinsic::loongarch_lsx_vaddi_bu:
3755 case Intrinsic::loongarch_lsx_vaddi_hu:
3756 case Intrinsic::loongarch_lsx_vaddi_wu:
3757 case Intrinsic::loongarch_lsx_vaddi_du:
3758 case Intrinsic::loongarch_lasx_xvaddi_bu:
3759 case Intrinsic::loongarch_lasx_xvaddi_hu:
3760 case Intrinsic::loongarch_lasx_xvaddi_wu:
3761 case Intrinsic::loongarch_lasx_xvaddi_du:
3762 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3763 lowerVectorSplatImm<5>(N, 2, DAG));
3764 case Intrinsic::loongarch_lsx_vsub_b:
3765 case Intrinsic::loongarch_lsx_vsub_h:
3766 case Intrinsic::loongarch_lsx_vsub_w:
3767 case Intrinsic::loongarch_lsx_vsub_d:
3768 case Intrinsic::loongarch_lasx_xvsub_b:
3769 case Intrinsic::loongarch_lasx_xvsub_h:
3770 case Intrinsic::loongarch_lasx_xvsub_w:
3771 case Intrinsic::loongarch_lasx_xvsub_d:
3772 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3773 N->getOperand(2));
3774 case Intrinsic::loongarch_lsx_vsubi_bu:
3775 case Intrinsic::loongarch_lsx_vsubi_hu:
3776 case Intrinsic::loongarch_lsx_vsubi_wu:
3777 case Intrinsic::loongarch_lsx_vsubi_du:
3778 case Intrinsic::loongarch_lasx_xvsubi_bu:
3779 case Intrinsic::loongarch_lasx_xvsubi_hu:
3780 case Intrinsic::loongarch_lasx_xvsubi_wu:
3781 case Intrinsic::loongarch_lasx_xvsubi_du:
3782 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3783 lowerVectorSplatImm<5>(N, 2, DAG));
3784 case Intrinsic::loongarch_lsx_vneg_b:
3785 case Intrinsic::loongarch_lsx_vneg_h:
3786 case Intrinsic::loongarch_lsx_vneg_w:
3787 case Intrinsic::loongarch_lsx_vneg_d:
3788 case Intrinsic::loongarch_lasx_xvneg_b:
3789 case Intrinsic::loongarch_lasx_xvneg_h:
3790 case Intrinsic::loongarch_lasx_xvneg_w:
3791 case Intrinsic::loongarch_lasx_xvneg_d:
3792 return DAG.getNode(
3793 ISD::SUB, DL, N->getValueType(0),
3794 DAG.getConstant(
3795 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3796 /*isSigned=*/true),
3797 SDLoc(N), N->getValueType(0)),
3798 N->getOperand(1));
3799 case Intrinsic::loongarch_lsx_vmax_b:
3800 case Intrinsic::loongarch_lsx_vmax_h:
3801 case Intrinsic::loongarch_lsx_vmax_w:
3802 case Intrinsic::loongarch_lsx_vmax_d:
3803 case Intrinsic::loongarch_lasx_xvmax_b:
3804 case Intrinsic::loongarch_lasx_xvmax_h:
3805 case Intrinsic::loongarch_lasx_xvmax_w:
3806 case Intrinsic::loongarch_lasx_xvmax_d:
3807 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3808 N->getOperand(2));
3809 case Intrinsic::loongarch_lsx_vmax_bu:
3810 case Intrinsic::loongarch_lsx_vmax_hu:
3811 case Intrinsic::loongarch_lsx_vmax_wu:
3812 case Intrinsic::loongarch_lsx_vmax_du:
3813 case Intrinsic::loongarch_lasx_xvmax_bu:
3814 case Intrinsic::loongarch_lasx_xvmax_hu:
3815 case Intrinsic::loongarch_lasx_xvmax_wu:
3816 case Intrinsic::loongarch_lasx_xvmax_du:
3817 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3818 N->getOperand(2));
3819 case Intrinsic::loongarch_lsx_vmaxi_b:
3820 case Intrinsic::loongarch_lsx_vmaxi_h:
3821 case Intrinsic::loongarch_lsx_vmaxi_w:
3822 case Intrinsic::loongarch_lsx_vmaxi_d:
3823 case Intrinsic::loongarch_lasx_xvmaxi_b:
3824 case Intrinsic::loongarch_lasx_xvmaxi_h:
3825 case Intrinsic::loongarch_lasx_xvmaxi_w:
3826 case Intrinsic::loongarch_lasx_xvmaxi_d:
3827 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3828 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3829 case Intrinsic::loongarch_lsx_vmaxi_bu:
3830 case Intrinsic::loongarch_lsx_vmaxi_hu:
3831 case Intrinsic::loongarch_lsx_vmaxi_wu:
3832 case Intrinsic::loongarch_lsx_vmaxi_du:
3833 case Intrinsic::loongarch_lasx_xvmaxi_bu:
3834 case Intrinsic::loongarch_lasx_xvmaxi_hu:
3835 case Intrinsic::loongarch_lasx_xvmaxi_wu:
3836 case Intrinsic::loongarch_lasx_xvmaxi_du:
3837 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3838 lowerVectorSplatImm<5>(N, 2, DAG));
3839 case Intrinsic::loongarch_lsx_vmin_b:
3840 case Intrinsic::loongarch_lsx_vmin_h:
3841 case Intrinsic::loongarch_lsx_vmin_w:
3842 case Intrinsic::loongarch_lsx_vmin_d:
3843 case Intrinsic::loongarch_lasx_xvmin_b:
3844 case Intrinsic::loongarch_lasx_xvmin_h:
3845 case Intrinsic::loongarch_lasx_xvmin_w:
3846 case Intrinsic::loongarch_lasx_xvmin_d:
3847 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3848 N->getOperand(2));
3849 case Intrinsic::loongarch_lsx_vmin_bu:
3850 case Intrinsic::loongarch_lsx_vmin_hu:
3851 case Intrinsic::loongarch_lsx_vmin_wu:
3852 case Intrinsic::loongarch_lsx_vmin_du:
3853 case Intrinsic::loongarch_lasx_xvmin_bu:
3854 case Intrinsic::loongarch_lasx_xvmin_hu:
3855 case Intrinsic::loongarch_lasx_xvmin_wu:
3856 case Intrinsic::loongarch_lasx_xvmin_du:
3857 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3858 N->getOperand(2));
3859 case Intrinsic::loongarch_lsx_vmini_b:
3860 case Intrinsic::loongarch_lsx_vmini_h:
3861 case Intrinsic::loongarch_lsx_vmini_w:
3862 case Intrinsic::loongarch_lsx_vmini_d:
3863 case Intrinsic::loongarch_lasx_xvmini_b:
3864 case Intrinsic::loongarch_lasx_xvmini_h:
3865 case Intrinsic::loongarch_lasx_xvmini_w:
3866 case Intrinsic::loongarch_lasx_xvmini_d:
3867 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3868 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3869 case Intrinsic::loongarch_lsx_vmini_bu:
3870 case Intrinsic::loongarch_lsx_vmini_hu:
3871 case Intrinsic::loongarch_lsx_vmini_wu:
3872 case Intrinsic::loongarch_lsx_vmini_du:
3873 case Intrinsic::loongarch_lasx_xvmini_bu:
3874 case Intrinsic::loongarch_lasx_xvmini_hu:
3875 case Intrinsic::loongarch_lasx_xvmini_wu:
3876 case Intrinsic::loongarch_lasx_xvmini_du:
3877 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3878 lowerVectorSplatImm<5>(N, 2, DAG));
3879 case Intrinsic::loongarch_lsx_vmul_b:
3880 case Intrinsic::loongarch_lsx_vmul_h:
3881 case Intrinsic::loongarch_lsx_vmul_w:
3882 case Intrinsic::loongarch_lsx_vmul_d:
3883 case Intrinsic::loongarch_lasx_xvmul_b:
3884 case Intrinsic::loongarch_lasx_xvmul_h:
3885 case Intrinsic::loongarch_lasx_xvmul_w:
3886 case Intrinsic::loongarch_lasx_xvmul_d:
3887 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
3888 N->getOperand(2));
3889 case Intrinsic::loongarch_lsx_vmadd_b:
3890 case Intrinsic::loongarch_lsx_vmadd_h:
3891 case Intrinsic::loongarch_lsx_vmadd_w:
3892 case Intrinsic::loongarch_lsx_vmadd_d:
3893 case Intrinsic::loongarch_lasx_xvmadd_b:
3894 case Intrinsic::loongarch_lasx_xvmadd_h:
3895 case Intrinsic::loongarch_lasx_xvmadd_w:
3896 case Intrinsic::loongarch_lasx_xvmadd_d: {
3897 EVT ResTy = N->getValueType(0);
3898 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
3899 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3900 N->getOperand(3)));
3901 }
3902 case Intrinsic::loongarch_lsx_vmsub_b:
3903 case Intrinsic::loongarch_lsx_vmsub_h:
3904 case Intrinsic::loongarch_lsx_vmsub_w:
3905 case Intrinsic::loongarch_lsx_vmsub_d:
3906 case Intrinsic::loongarch_lasx_xvmsub_b:
3907 case Intrinsic::loongarch_lasx_xvmsub_h:
3908 case Intrinsic::loongarch_lasx_xvmsub_w:
3909 case Intrinsic::loongarch_lasx_xvmsub_d: {
3910 EVT ResTy = N->getValueType(0);
3911 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
3912 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3913 N->getOperand(3)));
3914 }
3915 case Intrinsic::loongarch_lsx_vdiv_b:
3916 case Intrinsic::loongarch_lsx_vdiv_h:
3917 case Intrinsic::loongarch_lsx_vdiv_w:
3918 case Intrinsic::loongarch_lsx_vdiv_d:
3919 case Intrinsic::loongarch_lasx_xvdiv_b:
3920 case Intrinsic::loongarch_lasx_xvdiv_h:
3921 case Intrinsic::loongarch_lasx_xvdiv_w:
3922 case Intrinsic::loongarch_lasx_xvdiv_d:
3923 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
3924 N->getOperand(2));
3925 case Intrinsic::loongarch_lsx_vdiv_bu:
3926 case Intrinsic::loongarch_lsx_vdiv_hu:
3927 case Intrinsic::loongarch_lsx_vdiv_wu:
3928 case Intrinsic::loongarch_lsx_vdiv_du:
3929 case Intrinsic::loongarch_lasx_xvdiv_bu:
3930 case Intrinsic::loongarch_lasx_xvdiv_hu:
3931 case Intrinsic::loongarch_lasx_xvdiv_wu:
3932 case Intrinsic::loongarch_lasx_xvdiv_du:
3933 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
3934 N->getOperand(2));
3935 case Intrinsic::loongarch_lsx_vmod_b:
3936 case Intrinsic::loongarch_lsx_vmod_h:
3937 case Intrinsic::loongarch_lsx_vmod_w:
3938 case Intrinsic::loongarch_lsx_vmod_d:
3939 case Intrinsic::loongarch_lasx_xvmod_b:
3940 case Intrinsic::loongarch_lasx_xvmod_h:
3941 case Intrinsic::loongarch_lasx_xvmod_w:
3942 case Intrinsic::loongarch_lasx_xvmod_d:
3943 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
3944 N->getOperand(2));
3945 case Intrinsic::loongarch_lsx_vmod_bu:
3946 case Intrinsic::loongarch_lsx_vmod_hu:
3947 case Intrinsic::loongarch_lsx_vmod_wu:
3948 case Intrinsic::loongarch_lsx_vmod_du:
3949 case Intrinsic::loongarch_lasx_xvmod_bu:
3950 case Intrinsic::loongarch_lasx_xvmod_hu:
3951 case Intrinsic::loongarch_lasx_xvmod_wu:
3952 case Intrinsic::loongarch_lasx_xvmod_du:
3953 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
3954 N->getOperand(2));
3955 case Intrinsic::loongarch_lsx_vand_v:
3956 case Intrinsic::loongarch_lasx_xvand_v:
3957 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3958 N->getOperand(2));
3959 case Intrinsic::loongarch_lsx_vor_v:
3960 case Intrinsic::loongarch_lasx_xvor_v:
3961 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3962 N->getOperand(2));
3963 case Intrinsic::loongarch_lsx_vxor_v:
3964 case Intrinsic::loongarch_lasx_xvxor_v:
3965 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3966 N->getOperand(2));
3967 case Intrinsic::loongarch_lsx_vnor_v:
3968 case Intrinsic::loongarch_lasx_xvnor_v: {
3969 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3970 N->getOperand(2));
3971 return DAG.getNOT(DL, Res, Res->getValueType(0));
3972 }
3973 case Intrinsic::loongarch_lsx_vandi_b:
3974 case Intrinsic::loongarch_lasx_xvandi_b:
3975 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3976 lowerVectorSplatImm<8>(N, 2, DAG));
3977 case Intrinsic::loongarch_lsx_vori_b:
3978 case Intrinsic::loongarch_lasx_xvori_b:
3979 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3980 lowerVectorSplatImm<8>(N, 2, DAG));
3981 case Intrinsic::loongarch_lsx_vxori_b:
3982 case Intrinsic::loongarch_lasx_xvxori_b:
3983 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3984 lowerVectorSplatImm<8>(N, 2, DAG));
3985 case Intrinsic::loongarch_lsx_vsll_b:
3986 case Intrinsic::loongarch_lsx_vsll_h:
3987 case Intrinsic::loongarch_lsx_vsll_w:
3988 case Intrinsic::loongarch_lsx_vsll_d:
3989 case Intrinsic::loongarch_lasx_xvsll_b:
3990 case Intrinsic::loongarch_lasx_xvsll_h:
3991 case Intrinsic::loongarch_lasx_xvsll_w:
3992 case Intrinsic::loongarch_lasx_xvsll_d:
3993 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3994 truncateVecElts(N, DAG));
3995 case Intrinsic::loongarch_lsx_vslli_b:
3996 case Intrinsic::loongarch_lasx_xvslli_b:
3997 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3998 lowerVectorSplatImm<3>(N, 2, DAG));
3999 case Intrinsic::loongarch_lsx_vslli_h:
4000 case Intrinsic::loongarch_lasx_xvslli_h:
4001 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4002 lowerVectorSplatImm<4>(N, 2, DAG));
4003 case Intrinsic::loongarch_lsx_vslli_w:
4004 case Intrinsic::loongarch_lasx_xvslli_w:
4005 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4006 lowerVectorSplatImm<5>(N, 2, DAG));
4007 case Intrinsic::loongarch_lsx_vslli_d:
4008 case Intrinsic::loongarch_lasx_xvslli_d:
4009 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4010 lowerVectorSplatImm<6>(N, 2, DAG));
4011 case Intrinsic::loongarch_lsx_vsrl_b:
4012 case Intrinsic::loongarch_lsx_vsrl_h:
4013 case Intrinsic::loongarch_lsx_vsrl_w:
4014 case Intrinsic::loongarch_lsx_vsrl_d:
4015 case Intrinsic::loongarch_lasx_xvsrl_b:
4016 case Intrinsic::loongarch_lasx_xvsrl_h:
4017 case Intrinsic::loongarch_lasx_xvsrl_w:
4018 case Intrinsic::loongarch_lasx_xvsrl_d:
4019 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4020 truncateVecElts(N, DAG));
4021 case Intrinsic::loongarch_lsx_vsrli_b:
4022 case Intrinsic::loongarch_lasx_xvsrli_b:
4023 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4024 lowerVectorSplatImm<3>(N, 2, DAG));
4025 case Intrinsic::loongarch_lsx_vsrli_h:
4026 case Intrinsic::loongarch_lasx_xvsrli_h:
4027 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4028 lowerVectorSplatImm<4>(N, 2, DAG));
4029 case Intrinsic::loongarch_lsx_vsrli_w:
4030 case Intrinsic::loongarch_lasx_xvsrli_w:
4031 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4032 lowerVectorSplatImm<5>(N, 2, DAG));
4033 case Intrinsic::loongarch_lsx_vsrli_d:
4034 case Intrinsic::loongarch_lasx_xvsrli_d:
4035 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4036 lowerVectorSplatImm<6>(N, 2, DAG));
4037 case Intrinsic::loongarch_lsx_vsra_b:
4038 case Intrinsic::loongarch_lsx_vsra_h:
4039 case Intrinsic::loongarch_lsx_vsra_w:
4040 case Intrinsic::loongarch_lsx_vsra_d:
4041 case Intrinsic::loongarch_lasx_xvsra_b:
4042 case Intrinsic::loongarch_lasx_xvsra_h:
4043 case Intrinsic::loongarch_lasx_xvsra_w:
4044 case Intrinsic::loongarch_lasx_xvsra_d:
4045 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4046 truncateVecElts(N, DAG));
4047 case Intrinsic::loongarch_lsx_vsrai_b:
4048 case Intrinsic::loongarch_lasx_xvsrai_b:
4049 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4050 lowerVectorSplatImm<3>(N, 2, DAG));
4051 case Intrinsic::loongarch_lsx_vsrai_h:
4052 case Intrinsic::loongarch_lasx_xvsrai_h:
4053 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4054 lowerVectorSplatImm<4>(N, 2, DAG));
4055 case Intrinsic::loongarch_lsx_vsrai_w:
4056 case Intrinsic::loongarch_lasx_xvsrai_w:
4057 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4058 lowerVectorSplatImm<5>(N, 2, DAG));
4059 case Intrinsic::loongarch_lsx_vsrai_d:
4060 case Intrinsic::loongarch_lasx_xvsrai_d:
4061 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4062 lowerVectorSplatImm<6>(N, 2, DAG));
4063 case Intrinsic::loongarch_lsx_vclz_b:
4064 case Intrinsic::loongarch_lsx_vclz_h:
4065 case Intrinsic::loongarch_lsx_vclz_w:
4066 case Intrinsic::loongarch_lsx_vclz_d:
4067 case Intrinsic::loongarch_lasx_xvclz_b:
4068 case Intrinsic::loongarch_lasx_xvclz_h:
4069 case Intrinsic::loongarch_lasx_xvclz_w:
4070 case Intrinsic::loongarch_lasx_xvclz_d:
4071 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4072 case Intrinsic::loongarch_lsx_vpcnt_b:
4073 case Intrinsic::loongarch_lsx_vpcnt_h:
4074 case Intrinsic::loongarch_lsx_vpcnt_w:
4075 case Intrinsic::loongarch_lsx_vpcnt_d:
4076 case Intrinsic::loongarch_lasx_xvpcnt_b:
4077 case Intrinsic::loongarch_lasx_xvpcnt_h:
4078 case Intrinsic::loongarch_lasx_xvpcnt_w:
4079 case Intrinsic::loongarch_lasx_xvpcnt_d:
4080 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4081 case Intrinsic::loongarch_lsx_vbitclr_b:
4082 case Intrinsic::loongarch_lsx_vbitclr_h:
4083 case Intrinsic::loongarch_lsx_vbitclr_w:
4084 case Intrinsic::loongarch_lsx_vbitclr_d:
4085 case Intrinsic::loongarch_lasx_xvbitclr_b:
4086 case Intrinsic::loongarch_lasx_xvbitclr_h:
4087 case Intrinsic::loongarch_lasx_xvbitclr_w:
4088 case Intrinsic::loongarch_lasx_xvbitclr_d:
4089 return lowerVectorBitClear(N, DAG);
4090 case Intrinsic::loongarch_lsx_vbitclri_b:
4091 case Intrinsic::loongarch_lasx_xvbitclri_b:
4092 return lowerVectorBitClearImm<3>(N, DAG);
4093 case Intrinsic::loongarch_lsx_vbitclri_h:
4094 case Intrinsic::loongarch_lasx_xvbitclri_h:
4095 return lowerVectorBitClearImm<4>(N, DAG);
4096 case Intrinsic::loongarch_lsx_vbitclri_w:
4097 case Intrinsic::loongarch_lasx_xvbitclri_w:
4098 return lowerVectorBitClearImm<5>(N, DAG);
4099 case Intrinsic::loongarch_lsx_vbitclri_d:
4100 case Intrinsic::loongarch_lasx_xvbitclri_d:
4101 return lowerVectorBitClearImm<6>(N, DAG);
4102 case Intrinsic::loongarch_lsx_vbitset_b:
4103 case Intrinsic::loongarch_lsx_vbitset_h:
4104 case Intrinsic::loongarch_lsx_vbitset_w:
4105 case Intrinsic::loongarch_lsx_vbitset_d:
4106 case Intrinsic::loongarch_lasx_xvbitset_b:
4107 case Intrinsic::loongarch_lasx_xvbitset_h:
4108 case Intrinsic::loongarch_lasx_xvbitset_w:
4109 case Intrinsic::loongarch_lasx_xvbitset_d: {
4110 EVT VecTy = N->getValueType(0);
4111 SDValue One = DAG.getConstant(1, DL, VecTy);
4112 return DAG.getNode(
4113 ISD::OR, DL, VecTy, N->getOperand(1),
4114 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4115 }
4116 case Intrinsic::loongarch_lsx_vbitseti_b:
4117 case Intrinsic::loongarch_lasx_xvbitseti_b:
4118 return lowerVectorBitSetImm<3>(N, DAG);
4119 case Intrinsic::loongarch_lsx_vbitseti_h:
4120 case Intrinsic::loongarch_lasx_xvbitseti_h:
4121 return lowerVectorBitSetImm<4>(N, DAG);
4122 case Intrinsic::loongarch_lsx_vbitseti_w:
4123 case Intrinsic::loongarch_lasx_xvbitseti_w:
4124 return lowerVectorBitSetImm<5>(N, DAG);
4125 case Intrinsic::loongarch_lsx_vbitseti_d:
4126 case Intrinsic::loongarch_lasx_xvbitseti_d:
4127 return lowerVectorBitSetImm<6>(N, DAG);
4128 case Intrinsic::loongarch_lsx_vbitrev_b:
4129 case Intrinsic::loongarch_lsx_vbitrev_h:
4130 case Intrinsic::loongarch_lsx_vbitrev_w:
4131 case Intrinsic::loongarch_lsx_vbitrev_d:
4132 case Intrinsic::loongarch_lasx_xvbitrev_b:
4133 case Intrinsic::loongarch_lasx_xvbitrev_h:
4134 case Intrinsic::loongarch_lasx_xvbitrev_w:
4135 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4136 EVT VecTy = N->getValueType(0);
4137 SDValue One = DAG.getConstant(1, DL, VecTy);
4138 return DAG.getNode(
4139 ISD::XOR, DL, VecTy, N->getOperand(1),
4140 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4141 }
4142 case Intrinsic::loongarch_lsx_vbitrevi_b:
4143 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4144 return lowerVectorBitRevImm<3>(N, DAG);
4145 case Intrinsic::loongarch_lsx_vbitrevi_h:
4146 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4147 return lowerVectorBitRevImm<4>(N, DAG);
4148 case Intrinsic::loongarch_lsx_vbitrevi_w:
4149 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4150 return lowerVectorBitRevImm<5>(N, DAG);
4151 case Intrinsic::loongarch_lsx_vbitrevi_d:
4152 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4153 return lowerVectorBitRevImm<6>(N, DAG);
4154 case Intrinsic::loongarch_lsx_vfadd_s:
4155 case Intrinsic::loongarch_lsx_vfadd_d:
4156 case Intrinsic::loongarch_lasx_xvfadd_s:
4157 case Intrinsic::loongarch_lasx_xvfadd_d:
4158 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4159 N->getOperand(2));
4160 case Intrinsic::loongarch_lsx_vfsub_s:
4161 case Intrinsic::loongarch_lsx_vfsub_d:
4162 case Intrinsic::loongarch_lasx_xvfsub_s:
4163 case Intrinsic::loongarch_lasx_xvfsub_d:
4164 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4165 N->getOperand(2));
4166 case Intrinsic::loongarch_lsx_vfmul_s:
4167 case Intrinsic::loongarch_lsx_vfmul_d:
4168 case Intrinsic::loongarch_lasx_xvfmul_s:
4169 case Intrinsic::loongarch_lasx_xvfmul_d:
4170 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4171 N->getOperand(2));
4172 case Intrinsic::loongarch_lsx_vfdiv_s:
4173 case Intrinsic::loongarch_lsx_vfdiv_d:
4174 case Intrinsic::loongarch_lasx_xvfdiv_s:
4175 case Intrinsic::loongarch_lasx_xvfdiv_d:
4176 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4177 N->getOperand(2));
4178 case Intrinsic::loongarch_lsx_vfmadd_s:
4179 case Intrinsic::loongarch_lsx_vfmadd_d:
4180 case Intrinsic::loongarch_lasx_xvfmadd_s:
4181 case Intrinsic::loongarch_lasx_xvfmadd_d:
4182 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4183 N->getOperand(2), N->getOperand(3));
4184 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4185 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4186 N->getOperand(1), N->getOperand(2),
4187 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4188 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4189 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4190 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4191 N->getOperand(1), N->getOperand(2),
4192 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4193 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4194 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4195 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4196 N->getOperand(1), N->getOperand(2),
4197 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4198 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4199 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4200 N->getOperand(1), N->getOperand(2),
4201 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4202 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4203 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4204 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4205 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4206 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4207 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4208 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4209 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
4210 EVT ResTy = N->getValueType(0);
4211 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
4212 return DAG.getBuildVector(ResTy, DL, Ops);
4213 }
4214 case Intrinsic::loongarch_lsx_vreplve_b:
4215 case Intrinsic::loongarch_lsx_vreplve_h:
4216 case Intrinsic::loongarch_lsx_vreplve_w:
4217 case Intrinsic::loongarch_lsx_vreplve_d:
4218 case Intrinsic::loongarch_lasx_xvreplve_b:
4219 case Intrinsic::loongarch_lasx_xvreplve_h:
4220 case Intrinsic::loongarch_lasx_xvreplve_w:
4221 case Intrinsic::loongarch_lasx_xvreplve_d:
4222 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4223 N->getOperand(1),
4224 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4225 N->getOperand(2)));
4226 }
4227 return SDValue();
4228 }
4229
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const4230 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
4231 DAGCombinerInfo &DCI) const {
4232 SelectionDAG &DAG = DCI.DAG;
4233 switch (N->getOpcode()) {
4234 default:
4235 break;
4236 case ISD::AND:
4237 return performANDCombine(N, DAG, DCI, Subtarget);
4238 case ISD::OR:
4239 return performORCombine(N, DAG, DCI, Subtarget);
4240 case ISD::SETCC:
4241 return performSETCCCombine(N, DAG, DCI, Subtarget);
4242 case ISD::SRL:
4243 return performSRLCombine(N, DAG, DCI, Subtarget);
4244 case LoongArchISD::BITREV_W:
4245 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4246 case ISD::INTRINSIC_WO_CHAIN:
4247 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4248 }
4249 return SDValue();
4250 }
4251
insertDivByZeroTrap(MachineInstr & MI,MachineBasicBlock * MBB)4252 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
4253 MachineBasicBlock *MBB) {
4254 if (!ZeroDivCheck)
4255 return MBB;
4256
4257 // Build instructions:
4258 // MBB:
4259 // div(or mod) $dst, $dividend, $divisor
4260 // bnez $divisor, SinkMBB
4261 // BreakMBB:
4262 // break 7 // BRK_DIVZERO
4263 // SinkMBB:
4264 // fallthrough
4265 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4266 MachineFunction::iterator It = ++MBB->getIterator();
4267 MachineFunction *MF = MBB->getParent();
4268 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4269 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4270 MF->insert(It, BreakMBB);
4271 MF->insert(It, SinkMBB);
4272
4273 // Transfer the remainder of MBB and its successor edges to SinkMBB.
4274 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4275 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4276
4277 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4278 DebugLoc DL = MI.getDebugLoc();
4279 MachineOperand &Divisor = MI.getOperand(2);
4280 Register DivisorReg = Divisor.getReg();
4281
4282 // MBB:
4283 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4284 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4285 .addMBB(SinkMBB);
4286 MBB->addSuccessor(BreakMBB);
4287 MBB->addSuccessor(SinkMBB);
4288
4289 // BreakMBB:
4290 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4291 // definition of BRK_DIVZERO.
4292 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4293 BreakMBB->addSuccessor(SinkMBB);
4294
4295 // Clear Divisor's kill flag.
4296 Divisor.setIsKill(false);
4297
4298 return SinkMBB;
4299 }
4300
4301 static MachineBasicBlock *
emitVecCondBranchPseudo(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)4302 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
4303 const LoongArchSubtarget &Subtarget) {
4304 unsigned CondOpc;
4305 switch (MI.getOpcode()) {
4306 default:
4307 llvm_unreachable("Unexpected opcode");
4308 case LoongArch::PseudoVBZ:
4309 CondOpc = LoongArch::VSETEQZ_V;
4310 break;
4311 case LoongArch::PseudoVBZ_B:
4312 CondOpc = LoongArch::VSETANYEQZ_B;
4313 break;
4314 case LoongArch::PseudoVBZ_H:
4315 CondOpc = LoongArch::VSETANYEQZ_H;
4316 break;
4317 case LoongArch::PseudoVBZ_W:
4318 CondOpc = LoongArch::VSETANYEQZ_W;
4319 break;
4320 case LoongArch::PseudoVBZ_D:
4321 CondOpc = LoongArch::VSETANYEQZ_D;
4322 break;
4323 case LoongArch::PseudoVBNZ:
4324 CondOpc = LoongArch::VSETNEZ_V;
4325 break;
4326 case LoongArch::PseudoVBNZ_B:
4327 CondOpc = LoongArch::VSETALLNEZ_B;
4328 break;
4329 case LoongArch::PseudoVBNZ_H:
4330 CondOpc = LoongArch::VSETALLNEZ_H;
4331 break;
4332 case LoongArch::PseudoVBNZ_W:
4333 CondOpc = LoongArch::VSETALLNEZ_W;
4334 break;
4335 case LoongArch::PseudoVBNZ_D:
4336 CondOpc = LoongArch::VSETALLNEZ_D;
4337 break;
4338 case LoongArch::PseudoXVBZ:
4339 CondOpc = LoongArch::XVSETEQZ_V;
4340 break;
4341 case LoongArch::PseudoXVBZ_B:
4342 CondOpc = LoongArch::XVSETANYEQZ_B;
4343 break;
4344 case LoongArch::PseudoXVBZ_H:
4345 CondOpc = LoongArch::XVSETANYEQZ_H;
4346 break;
4347 case LoongArch::PseudoXVBZ_W:
4348 CondOpc = LoongArch::XVSETANYEQZ_W;
4349 break;
4350 case LoongArch::PseudoXVBZ_D:
4351 CondOpc = LoongArch::XVSETANYEQZ_D;
4352 break;
4353 case LoongArch::PseudoXVBNZ:
4354 CondOpc = LoongArch::XVSETNEZ_V;
4355 break;
4356 case LoongArch::PseudoXVBNZ_B:
4357 CondOpc = LoongArch::XVSETALLNEZ_B;
4358 break;
4359 case LoongArch::PseudoXVBNZ_H:
4360 CondOpc = LoongArch::XVSETALLNEZ_H;
4361 break;
4362 case LoongArch::PseudoXVBNZ_W:
4363 CondOpc = LoongArch::XVSETALLNEZ_W;
4364 break;
4365 case LoongArch::PseudoXVBNZ_D:
4366 CondOpc = LoongArch::XVSETALLNEZ_D;
4367 break;
4368 }
4369
4370 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4371 const BasicBlock *LLVM_BB = BB->getBasicBlock();
4372 DebugLoc DL = MI.getDebugLoc();
4373 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4374 MachineFunction::iterator It = ++BB->getIterator();
4375
4376 MachineFunction *F = BB->getParent();
4377 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4378 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4379 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4380
4381 F->insert(It, FalseBB);
4382 F->insert(It, TrueBB);
4383 F->insert(It, SinkBB);
4384
4385 // Transfer the remainder of MBB and its successor edges to Sink.
4386 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4387 SinkBB->transferSuccessorsAndUpdatePHIs(BB);
4388
4389 // Insert the real instruction to BB.
4390 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4391 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4392
4393 // Insert branch.
4394 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4395 BB->addSuccessor(FalseBB);
4396 BB->addSuccessor(TrueBB);
4397
4398 // FalseBB.
4399 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4400 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4401 .addReg(LoongArch::R0)
4402 .addImm(0);
4403 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4404 FalseBB->addSuccessor(SinkBB);
4405
4406 // TrueBB.
4407 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4408 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4409 .addReg(LoongArch::R0)
4410 .addImm(1);
4411 TrueBB->addSuccessor(SinkBB);
4412
4413 // SinkBB: merge the results.
4414 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4415 MI.getOperand(0).getReg())
4416 .addReg(RD1)
4417 .addMBB(FalseBB)
4418 .addReg(RD2)
4419 .addMBB(TrueBB);
4420
4421 // The pseudo instruction is gone now.
4422 MI.eraseFromParent();
4423 return SinkBB;
4424 }
4425
4426 static MachineBasicBlock *
emitPseudoXVINSGR2VR(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)4427 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
4428 const LoongArchSubtarget &Subtarget) {
4429 unsigned InsOp;
4430 unsigned HalfSize;
4431 switch (MI.getOpcode()) {
4432 default:
4433 llvm_unreachable("Unexpected opcode");
4434 case LoongArch::PseudoXVINSGR2VR_B:
4435 HalfSize = 16;
4436 InsOp = LoongArch::VINSGR2VR_B;
4437 break;
4438 case LoongArch::PseudoXVINSGR2VR_H:
4439 HalfSize = 8;
4440 InsOp = LoongArch::VINSGR2VR_H;
4441 break;
4442 }
4443 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4444 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4445 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4446 DebugLoc DL = MI.getDebugLoc();
4447 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4448 // XDst = vector_insert XSrc, Elt, Idx
4449 Register XDst = MI.getOperand(0).getReg();
4450 Register XSrc = MI.getOperand(1).getReg();
4451 Register Elt = MI.getOperand(2).getReg();
4452 unsigned Idx = MI.getOperand(3).getImm();
4453
4454 Register ScratchReg1 = XSrc;
4455 if (Idx >= HalfSize) {
4456 ScratchReg1 = MRI.createVirtualRegister(RC);
4457 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4458 .addReg(XSrc)
4459 .addReg(XSrc)
4460 .addImm(1);
4461 }
4462
4463 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4464 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4465 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4466 .addReg(ScratchReg1, 0, LoongArch::sub_128);
4467 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4468 .addReg(ScratchSubReg1)
4469 .addReg(Elt)
4470 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4471
4472 Register ScratchReg2 = XDst;
4473 if (Idx >= HalfSize)
4474 ScratchReg2 = MRI.createVirtualRegister(RC);
4475
4476 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4477 .addImm(0)
4478 .addReg(ScratchSubReg2)
4479 .addImm(LoongArch::sub_128);
4480
4481 if (Idx >= HalfSize)
4482 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4483 .addReg(XSrc)
4484 .addReg(ScratchReg2)
4485 .addImm(2);
4486
4487 MI.eraseFromParent();
4488 return BB;
4489 }
4490
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * BB) const4491 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4492 MachineInstr &MI, MachineBasicBlock *BB) const {
4493 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4494 DebugLoc DL = MI.getDebugLoc();
4495
4496 switch (MI.getOpcode()) {
4497 default:
4498 llvm_unreachable("Unexpected instr type to insert");
4499 case LoongArch::DIV_W:
4500 case LoongArch::DIV_WU:
4501 case LoongArch::MOD_W:
4502 case LoongArch::MOD_WU:
4503 case LoongArch::DIV_D:
4504 case LoongArch::DIV_DU:
4505 case LoongArch::MOD_D:
4506 case LoongArch::MOD_DU:
4507 return insertDivByZeroTrap(MI, BB);
4508 break;
4509 case LoongArch::WRFCSR: {
4510 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4511 LoongArch::FCSR0 + MI.getOperand(0).getImm())
4512 .addReg(MI.getOperand(1).getReg());
4513 MI.eraseFromParent();
4514 return BB;
4515 }
4516 case LoongArch::RDFCSR: {
4517 MachineInstr *ReadFCSR =
4518 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4519 MI.getOperand(0).getReg())
4520 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4521 ReadFCSR->getOperand(1).setIsUndef();
4522 MI.eraseFromParent();
4523 return BB;
4524 }
4525 case LoongArch::PseudoVBZ:
4526 case LoongArch::PseudoVBZ_B:
4527 case LoongArch::PseudoVBZ_H:
4528 case LoongArch::PseudoVBZ_W:
4529 case LoongArch::PseudoVBZ_D:
4530 case LoongArch::PseudoVBNZ:
4531 case LoongArch::PseudoVBNZ_B:
4532 case LoongArch::PseudoVBNZ_H:
4533 case LoongArch::PseudoVBNZ_W:
4534 case LoongArch::PseudoVBNZ_D:
4535 case LoongArch::PseudoXVBZ:
4536 case LoongArch::PseudoXVBZ_B:
4537 case LoongArch::PseudoXVBZ_H:
4538 case LoongArch::PseudoXVBZ_W:
4539 case LoongArch::PseudoXVBZ_D:
4540 case LoongArch::PseudoXVBNZ:
4541 case LoongArch::PseudoXVBNZ_B:
4542 case LoongArch::PseudoXVBNZ_H:
4543 case LoongArch::PseudoXVBNZ_W:
4544 case LoongArch::PseudoXVBNZ_D:
4545 return emitVecCondBranchPseudo(MI, BB, Subtarget);
4546 case LoongArch::PseudoXVINSGR2VR_B:
4547 case LoongArch::PseudoXVINSGR2VR_H:
4548 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4549 }
4550 }
4551
allowsMisalignedMemoryAccesses(EVT VT,unsigned AddrSpace,Align Alignment,MachineMemOperand::Flags Flags,unsigned * Fast) const4552 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
4553 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4554 unsigned *Fast) const {
4555 if (!Subtarget.hasUAL())
4556 return false;
4557
4558 // TODO: set reasonable speed number.
4559 if (Fast)
4560 *Fast = 1;
4561 return true;
4562 }
4563
getTargetNodeName(unsigned Opcode) const4564 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4565 switch ((LoongArchISD::NodeType)Opcode) {
4566 case LoongArchISD::FIRST_NUMBER:
4567 break;
4568
4569 #define NODE_NAME_CASE(node) \
4570 case LoongArchISD::node: \
4571 return "LoongArchISD::" #node;
4572
4573 // TODO: Add more target-dependent nodes later.
4574 NODE_NAME_CASE(CALL)
4575 NODE_NAME_CASE(CALL_MEDIUM)
4576 NODE_NAME_CASE(CALL_LARGE)
4577 NODE_NAME_CASE(RET)
4578 NODE_NAME_CASE(TAIL)
4579 NODE_NAME_CASE(TAIL_MEDIUM)
4580 NODE_NAME_CASE(TAIL_LARGE)
4581 NODE_NAME_CASE(SLL_W)
4582 NODE_NAME_CASE(SRA_W)
4583 NODE_NAME_CASE(SRL_W)
4584 NODE_NAME_CASE(BSTRINS)
4585 NODE_NAME_CASE(BSTRPICK)
4586 NODE_NAME_CASE(MOVGR2FR_W_LA64)
4587 NODE_NAME_CASE(MOVFR2GR_S_LA64)
4588 NODE_NAME_CASE(FTINT)
4589 NODE_NAME_CASE(REVB_2H)
4590 NODE_NAME_CASE(REVB_2W)
4591 NODE_NAME_CASE(BITREV_4B)
4592 NODE_NAME_CASE(BITREV_W)
4593 NODE_NAME_CASE(ROTR_W)
4594 NODE_NAME_CASE(ROTL_W)
4595 NODE_NAME_CASE(DIV_WU)
4596 NODE_NAME_CASE(MOD_WU)
4597 NODE_NAME_CASE(CLZ_W)
4598 NODE_NAME_CASE(CTZ_W)
4599 NODE_NAME_CASE(DBAR)
4600 NODE_NAME_CASE(IBAR)
4601 NODE_NAME_CASE(BREAK)
4602 NODE_NAME_CASE(SYSCALL)
4603 NODE_NAME_CASE(CRC_W_B_W)
4604 NODE_NAME_CASE(CRC_W_H_W)
4605 NODE_NAME_CASE(CRC_W_W_W)
4606 NODE_NAME_CASE(CRC_W_D_W)
4607 NODE_NAME_CASE(CRCC_W_B_W)
4608 NODE_NAME_CASE(CRCC_W_H_W)
4609 NODE_NAME_CASE(CRCC_W_W_W)
4610 NODE_NAME_CASE(CRCC_W_D_W)
4611 NODE_NAME_CASE(CSRRD)
4612 NODE_NAME_CASE(CSRWR)
4613 NODE_NAME_CASE(CSRXCHG)
4614 NODE_NAME_CASE(IOCSRRD_B)
4615 NODE_NAME_CASE(IOCSRRD_H)
4616 NODE_NAME_CASE(IOCSRRD_W)
4617 NODE_NAME_CASE(IOCSRRD_D)
4618 NODE_NAME_CASE(IOCSRWR_B)
4619 NODE_NAME_CASE(IOCSRWR_H)
4620 NODE_NAME_CASE(IOCSRWR_W)
4621 NODE_NAME_CASE(IOCSRWR_D)
4622 NODE_NAME_CASE(CPUCFG)
4623 NODE_NAME_CASE(MOVGR2FCSR)
4624 NODE_NAME_CASE(MOVFCSR2GR)
4625 NODE_NAME_CASE(CACOP_D)
4626 NODE_NAME_CASE(CACOP_W)
4627 NODE_NAME_CASE(VSHUF)
4628 NODE_NAME_CASE(VPICKEV)
4629 NODE_NAME_CASE(VPICKOD)
4630 NODE_NAME_CASE(VPACKEV)
4631 NODE_NAME_CASE(VPACKOD)
4632 NODE_NAME_CASE(VILVL)
4633 NODE_NAME_CASE(VILVH)
4634 NODE_NAME_CASE(VSHUF4I)
4635 NODE_NAME_CASE(VREPLVEI)
4636 NODE_NAME_CASE(XVPERMI)
4637 NODE_NAME_CASE(VPICK_SEXT_ELT)
4638 NODE_NAME_CASE(VPICK_ZEXT_ELT)
4639 NODE_NAME_CASE(VREPLVE)
4640 NODE_NAME_CASE(VALL_ZERO)
4641 NODE_NAME_CASE(VANY_ZERO)
4642 NODE_NAME_CASE(VALL_NONZERO)
4643 NODE_NAME_CASE(VANY_NONZERO)
4644 }
4645 #undef NODE_NAME_CASE
4646 return nullptr;
4647 }
4648
4649 //===----------------------------------------------------------------------===//
4650 // Calling Convention Implementation
4651 //===----------------------------------------------------------------------===//
4652
4653 // Eight general-purpose registers a0-a7 used for passing integer arguments,
4654 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
4655 // fixed-point arguments, and floating-point arguments when no FPR is available
4656 // or with soft float ABI.
4657 const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4658 LoongArch::R7, LoongArch::R8, LoongArch::R9,
4659 LoongArch::R10, LoongArch::R11};
4660 // Eight floating-point registers fa0-fa7 used for passing floating-point
4661 // arguments, and fa0-fa1 are also used to return values.
4662 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4663 LoongArch::F3, LoongArch::F4, LoongArch::F5,
4664 LoongArch::F6, LoongArch::F7};
4665 // FPR32 and FPR64 alias each other.
4666 const MCPhysReg ArgFPR64s[] = {
4667 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4668 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4669
4670 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4671 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4672 LoongArch::VR6, LoongArch::VR7};
4673
4674 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4675 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4676 LoongArch::XR6, LoongArch::XR7};
4677
4678 // Pass a 2*GRLen argument that has been split into two GRLen values through
4679 // registers or the stack as necessary.
CC_LoongArchAssign2GRLen(unsigned GRLen,CCState & State,CCValAssign VA1,ISD::ArgFlagsTy ArgFlags1,unsigned ValNo2,MVT ValVT2,MVT LocVT2,ISD::ArgFlagsTy ArgFlags2)4680 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4681 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4682 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4683 ISD::ArgFlagsTy ArgFlags2) {
4684 unsigned GRLenInBytes = GRLen / 8;
4685 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4686 // At least one half can be passed via register.
4687 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4688 VA1.getLocVT(), CCValAssign::Full));
4689 } else {
4690 // Both halves must be passed on the stack, with proper alignment.
4691 Align StackAlign =
4692 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4693 State.addLoc(
4694 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
4695 State.AllocateStack(GRLenInBytes, StackAlign),
4696 VA1.getLocVT(), CCValAssign::Full));
4697 State.addLoc(CCValAssign::getMem(
4698 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4699 LocVT2, CCValAssign::Full));
4700 return false;
4701 }
4702 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4703 // The second half can also be passed via register.
4704 State.addLoc(
4705 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4706 } else {
4707 // The second half is passed via the stack, without additional alignment.
4708 State.addLoc(CCValAssign::getMem(
4709 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4710 LocVT2, CCValAssign::Full));
4711 }
4712 return false;
4713 }
4714
4715 // Implements the LoongArch calling convention. Returns true upon failure.
CC_LoongArch(const DataLayout & DL,LoongArchABI::ABI ABI,unsigned ValNo,MVT ValVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State,bool IsFixed,bool IsRet,Type * OrigTy)4716 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
4717 unsigned ValNo, MVT ValVT,
4718 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4719 CCState &State, bool IsFixed, bool IsRet,
4720 Type *OrigTy) {
4721 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4722 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4723 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4724 MVT LocVT = ValVT;
4725
4726 // Any return value split into more than two values can't be returned
4727 // directly.
4728 if (IsRet && ValNo > 1)
4729 return true;
4730
4731 // If passing a variadic argument, or if no FPR is available.
4732 bool UseGPRForFloat = true;
4733
4734 switch (ABI) {
4735 default:
4736 llvm_unreachable("Unexpected ABI");
4737 break;
4738 case LoongArchABI::ABI_ILP32F:
4739 case LoongArchABI::ABI_LP64F:
4740 case LoongArchABI::ABI_ILP32D:
4741 case LoongArchABI::ABI_LP64D:
4742 UseGPRForFloat = !IsFixed;
4743 break;
4744 case LoongArchABI::ABI_ILP32S:
4745 case LoongArchABI::ABI_LP64S:
4746 break;
4747 }
4748
4749 // FPR32 and FPR64 alias each other.
4750 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4751 UseGPRForFloat = true;
4752
4753 if (UseGPRForFloat && ValVT == MVT::f32) {
4754 LocVT = GRLenVT;
4755 LocInfo = CCValAssign::BCvt;
4756 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4757 LocVT = MVT::i64;
4758 LocInfo = CCValAssign::BCvt;
4759 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4760 // TODO: Handle passing f64 on LA32 with D feature.
4761 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4762 }
4763
4764 // If this is a variadic argument, the LoongArch calling convention requires
4765 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4766 // byte alignment. An aligned register should be used regardless of whether
4767 // the original argument was split during legalisation or not. The argument
4768 // will not be passed by registers if the original type is larger than
4769 // 2*GRLen, so the register alignment rule does not apply.
4770 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4771 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4772 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4773 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4774 // Skip 'odd' register if necessary.
4775 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
4776 State.AllocateReg(ArgGPRs);
4777 }
4778
4779 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4780 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4781 State.getPendingArgFlags();
4782
4783 assert(PendingLocs.size() == PendingArgFlags.size() &&
4784 "PendingLocs and PendingArgFlags out of sync");
4785
4786 // Split arguments might be passed indirectly, so keep track of the pending
4787 // values.
4788 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
4789 LocVT = GRLenVT;
4790 LocInfo = CCValAssign::Indirect;
4791 PendingLocs.push_back(
4792 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4793 PendingArgFlags.push_back(ArgFlags);
4794 if (!ArgFlags.isSplitEnd()) {
4795 return false;
4796 }
4797 }
4798
4799 // If the split argument only had two elements, it should be passed directly
4800 // in registers or on the stack.
4801 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4802 PendingLocs.size() <= 2) {
4803 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4804 // Apply the normal calling convention rules to the first half of the
4805 // split argument.
4806 CCValAssign VA = PendingLocs[0];
4807 ISD::ArgFlagsTy AF = PendingArgFlags[0];
4808 PendingLocs.clear();
4809 PendingArgFlags.clear();
4810 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
4811 ArgFlags);
4812 }
4813
4814 // Allocate to a register if possible, or else a stack slot.
4815 Register Reg;
4816 unsigned StoreSizeBytes = GRLen / 8;
4817 Align StackAlign = Align(GRLen / 8);
4818
4819 if (ValVT == MVT::f32 && !UseGPRForFloat)
4820 Reg = State.AllocateReg(ArgFPR32s);
4821 else if (ValVT == MVT::f64 && !UseGPRForFloat)
4822 Reg = State.AllocateReg(ArgFPR64s);
4823 else if (ValVT.is128BitVector())
4824 Reg = State.AllocateReg(ArgVRs);
4825 else if (ValVT.is256BitVector())
4826 Reg = State.AllocateReg(ArgXRs);
4827 else
4828 Reg = State.AllocateReg(ArgGPRs);
4829
4830 unsigned StackOffset =
4831 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
4832
4833 // If we reach this point and PendingLocs is non-empty, we must be at the
4834 // end of a split argument that must be passed indirectly.
4835 if (!PendingLocs.empty()) {
4836 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4837 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4838 for (auto &It : PendingLocs) {
4839 if (Reg)
4840 It.convertToReg(Reg);
4841 else
4842 It.convertToMem(StackOffset);
4843 State.addLoc(It);
4844 }
4845 PendingLocs.clear();
4846 PendingArgFlags.clear();
4847 return false;
4848 }
4849 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
4850 "Expected an GRLenVT at this stage");
4851
4852 if (Reg) {
4853 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4854 return false;
4855 }
4856
4857 // When a floating-point value is passed on the stack, no bit-cast is needed.
4858 if (ValVT.isFloatingPoint()) {
4859 LocVT = ValVT;
4860 LocInfo = CCValAssign::Full;
4861 }
4862
4863 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
4864 return false;
4865 }
4866
analyzeInputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::InputArg> & Ins,bool IsRet,LoongArchCCAssignFn Fn) const4867 void LoongArchTargetLowering::analyzeInputArgs(
4868 MachineFunction &MF, CCState &CCInfo,
4869 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
4870 LoongArchCCAssignFn Fn) const {
4871 FunctionType *FType = MF.getFunction().getFunctionType();
4872 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4873 MVT ArgVT = Ins[i].VT;
4874 Type *ArgTy = nullptr;
4875 if (IsRet)
4876 ArgTy = FType->getReturnType();
4877 else if (Ins[i].isOrigArg())
4878 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
4879 LoongArchABI::ABI ABI =
4880 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4881 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
4882 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
4883 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
4884 << '\n');
4885 llvm_unreachable("");
4886 }
4887 }
4888 }
4889
analyzeOutputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::OutputArg> & Outs,bool IsRet,CallLoweringInfo * CLI,LoongArchCCAssignFn Fn) const4890 void LoongArchTargetLowering::analyzeOutputArgs(
4891 MachineFunction &MF, CCState &CCInfo,
4892 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
4893 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
4894 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4895 MVT ArgVT = Outs[i].VT;
4896 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
4897 LoongArchABI::ABI ABI =
4898 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4899 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
4900 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
4901 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
4902 << "\n");
4903 llvm_unreachable("");
4904 }
4905 }
4906 }
4907
4908 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
4909 // values.
convertLocVTToValVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)4910 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
4911 const CCValAssign &VA, const SDLoc &DL) {
4912 switch (VA.getLocInfo()) {
4913 default:
4914 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4915 case CCValAssign::Full:
4916 case CCValAssign::Indirect:
4917 break;
4918 case CCValAssign::BCvt:
4919 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4920 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
4921 else
4922 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
4923 break;
4924 }
4925 return Val;
4926 }
4927
unpackFromRegLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL,const ISD::InputArg & In,const LoongArchTargetLowering & TLI)4928 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
4929 const CCValAssign &VA, const SDLoc &DL,
4930 const ISD::InputArg &In,
4931 const LoongArchTargetLowering &TLI) {
4932 MachineFunction &MF = DAG.getMachineFunction();
4933 MachineRegisterInfo &RegInfo = MF.getRegInfo();
4934 EVT LocVT = VA.getLocVT();
4935 SDValue Val;
4936 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
4937 Register VReg = RegInfo.createVirtualRegister(RC);
4938 RegInfo.addLiveIn(VA.getLocReg(), VReg);
4939 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
4940
4941 // If input is sign extended from 32 bits, note it for the OptW pass.
4942 if (In.isOrigArg()) {
4943 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
4944 if (OrigArg->getType()->isIntegerTy()) {
4945 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
4946 // An input zero extended from i31 can also be considered sign extended.
4947 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
4948 (BitWidth < 32 && In.Flags.isZExt())) {
4949 LoongArchMachineFunctionInfo *LAFI =
4950 MF.getInfo<LoongArchMachineFunctionInfo>();
4951 LAFI->addSExt32Register(VReg);
4952 }
4953 }
4954 }
4955
4956 return convertLocVTToValVT(DAG, Val, VA, DL);
4957 }
4958
4959 // The caller is responsible for loading the full value if the argument is
4960 // passed with CCValAssign::Indirect.
unpackFromMemLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL)4961 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
4962 const CCValAssign &VA, const SDLoc &DL) {
4963 MachineFunction &MF = DAG.getMachineFunction();
4964 MachineFrameInfo &MFI = MF.getFrameInfo();
4965 EVT ValVT = VA.getValVT();
4966 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
4967 /*IsImmutable=*/true);
4968 SDValue FIN = DAG.getFrameIndex(
4969 FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
4970
4971 ISD::LoadExtType ExtType;
4972 switch (VA.getLocInfo()) {
4973 default:
4974 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4975 case CCValAssign::Full:
4976 case CCValAssign::Indirect:
4977 case CCValAssign::BCvt:
4978 ExtType = ISD::NON_EXTLOAD;
4979 break;
4980 }
4981 return DAG.getExtLoad(
4982 ExtType, DL, VA.getLocVT(), Chain, FIN,
4983 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
4984 }
4985
convertValVTToLocVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)4986 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
4987 const CCValAssign &VA, const SDLoc &DL) {
4988 EVT LocVT = VA.getLocVT();
4989
4990 switch (VA.getLocInfo()) {
4991 default:
4992 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4993 case CCValAssign::Full:
4994 break;
4995 case CCValAssign::BCvt:
4996 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4997 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
4998 else
4999 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5000 break;
5001 }
5002 return Val;
5003 }
5004
CC_LoongArch_GHC(unsigned ValNo,MVT ValVT,MVT LocVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State)5005 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5006 CCValAssign::LocInfo LocInfo,
5007 ISD::ArgFlagsTy ArgFlags, CCState &State) {
5008 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5009 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5010 // s0 s1 s2 s3 s4 s5 s6 s7 s8
5011 static const MCPhysReg GPRList[] = {
5012 LoongArch::R23, LoongArch::R24, LoongArch::R25,
5013 LoongArch::R26, LoongArch::R27, LoongArch::R28,
5014 LoongArch::R29, LoongArch::R30, LoongArch::R31};
5015 if (unsigned Reg = State.AllocateReg(GPRList)) {
5016 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5017 return false;
5018 }
5019 }
5020
5021 if (LocVT == MVT::f32) {
5022 // Pass in STG registers: F1, F2, F3, F4
5023 // fs0,fs1,fs2,fs3
5024 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5025 LoongArch::F26, LoongArch::F27};
5026 if (unsigned Reg = State.AllocateReg(FPR32List)) {
5027 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5028 return false;
5029 }
5030 }
5031
5032 if (LocVT == MVT::f64) {
5033 // Pass in STG registers: D1, D2, D3, D4
5034 // fs4,fs5,fs6,fs7
5035 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5036 LoongArch::F30_64, LoongArch::F31_64};
5037 if (unsigned Reg = State.AllocateReg(FPR64List)) {
5038 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5039 return false;
5040 }
5041 }
5042
5043 report_fatal_error("No registers left in GHC calling convention");
5044 return true;
5045 }
5046
5047 // Transform physical registers into virtual registers.
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const5048 SDValue LoongArchTargetLowering::LowerFormalArguments(
5049 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5050 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5051 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5052
5053 MachineFunction &MF = DAG.getMachineFunction();
5054
5055 switch (CallConv) {
5056 default:
5057 llvm_unreachable("Unsupported calling convention");
5058 case CallingConv::C:
5059 case CallingConv::Fast:
5060 break;
5061 case CallingConv::GHC:
5062 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5063 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5064 report_fatal_error(
5065 "GHC calling convention requires the F and D extensions");
5066 }
5067
5068 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5069 MVT GRLenVT = Subtarget.getGRLenVT();
5070 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5071 // Used with varargs to acumulate store chains.
5072 std::vector<SDValue> OutChains;
5073
5074 // Assign locations to all of the incoming arguments.
5075 SmallVector<CCValAssign> ArgLocs;
5076 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5077
5078 if (CallConv == CallingConv::GHC)
5079 CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
5080 else
5081 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5082
5083 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5084 CCValAssign &VA = ArgLocs[i];
5085 SDValue ArgValue;
5086 if (VA.isRegLoc())
5087 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5088 else
5089 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5090 if (VA.getLocInfo() == CCValAssign::Indirect) {
5091 // If the original argument was split and passed by reference, we need to
5092 // load all parts of it here (using the same address).
5093 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5094 MachinePointerInfo()));
5095 unsigned ArgIndex = Ins[i].OrigArgIndex;
5096 unsigned ArgPartOffset = Ins[i].PartOffset;
5097 assert(ArgPartOffset == 0);
5098 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5099 CCValAssign &PartVA = ArgLocs[i + 1];
5100 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5101 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5102 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5103 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5104 MachinePointerInfo()));
5105 ++i;
5106 }
5107 continue;
5108 }
5109 InVals.push_back(ArgValue);
5110 }
5111
5112 if (IsVarArg) {
5113 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
5114 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5115 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5116 MachineFrameInfo &MFI = MF.getFrameInfo();
5117 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5118 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5119
5120 // Offset of the first variable argument from stack pointer, and size of
5121 // the vararg save area. For now, the varargs save area is either zero or
5122 // large enough to hold a0-a7.
5123 int VaArgOffset, VarArgsSaveSize;
5124
5125 // If all registers are allocated, then all varargs must be passed on the
5126 // stack and we don't need to save any argregs.
5127 if (ArgRegs.size() == Idx) {
5128 VaArgOffset = CCInfo.getStackSize();
5129 VarArgsSaveSize = 0;
5130 } else {
5131 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5132 VaArgOffset = -VarArgsSaveSize;
5133 }
5134
5135 // Record the frame index of the first variable argument
5136 // which is a value necessary to VASTART.
5137 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5138 LoongArchFI->setVarArgsFrameIndex(FI);
5139
5140 // If saving an odd number of registers then create an extra stack slot to
5141 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5142 // offsets to even-numbered registered remain 2*GRLen-aligned.
5143 if (Idx % 2) {
5144 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5145 true);
5146 VarArgsSaveSize += GRLenInBytes;
5147 }
5148
5149 // Copy the integer registers that may have been used for passing varargs
5150 // to the vararg save area.
5151 for (unsigned I = Idx; I < ArgRegs.size();
5152 ++I, VaArgOffset += GRLenInBytes) {
5153 const Register Reg = RegInfo.createVirtualRegister(RC);
5154 RegInfo.addLiveIn(ArgRegs[I], Reg);
5155 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5156 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5157 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5158 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5159 MachinePointerInfo::getFixedStack(MF, FI));
5160 cast<StoreSDNode>(Store.getNode())
5161 ->getMemOperand()
5162 ->setValue((Value *)nullptr);
5163 OutChains.push_back(Store);
5164 }
5165 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5166 }
5167
5168 // All stores are grouped in one node to allow the matching between
5169 // the size of Ins and InVals. This only happens for vararg functions.
5170 if (!OutChains.empty()) {
5171 OutChains.push_back(Chain);
5172 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5173 }
5174
5175 return Chain;
5176 }
5177
mayBeEmittedAsTailCall(const CallInst * CI) const5178 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
5179 return CI->isTailCall();
5180 }
5181
5182 // Check if the return value is used as only a return value, as otherwise
5183 // we can't perform a tail-call.
isUsedByReturnOnly(SDNode * N,SDValue & Chain) const5184 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
5185 SDValue &Chain) const {
5186 if (N->getNumValues() != 1)
5187 return false;
5188 if (!N->hasNUsesOfValue(1, 0))
5189 return false;
5190
5191 SDNode *Copy = *N->use_begin();
5192 if (Copy->getOpcode() != ISD::CopyToReg)
5193 return false;
5194
5195 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5196 // isn't safe to perform a tail call.
5197 if (Copy->getGluedNode())
5198 return false;
5199
5200 // The copy must be used by a LoongArchISD::RET, and nothing else.
5201 bool HasRet = false;
5202 for (SDNode *Node : Copy->uses()) {
5203 if (Node->getOpcode() != LoongArchISD::RET)
5204 return false;
5205 HasRet = true;
5206 }
5207
5208 if (!HasRet)
5209 return false;
5210
5211 Chain = Copy->getOperand(0);
5212 return true;
5213 }
5214
5215 // Check whether the call is eligible for tail call optimization.
isEligibleForTailCallOptimization(CCState & CCInfo,CallLoweringInfo & CLI,MachineFunction & MF,const SmallVectorImpl<CCValAssign> & ArgLocs) const5216 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5217 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5218 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5219
5220 auto CalleeCC = CLI.CallConv;
5221 auto &Outs = CLI.Outs;
5222 auto &Caller = MF.getFunction();
5223 auto CallerCC = Caller.getCallingConv();
5224
5225 // Do not tail call opt if the stack is used to pass parameters.
5226 if (CCInfo.getStackSize() != 0)
5227 return false;
5228
5229 // Do not tail call opt if any parameters need to be passed indirectly.
5230 for (auto &VA : ArgLocs)
5231 if (VA.getLocInfo() == CCValAssign::Indirect)
5232 return false;
5233
5234 // Do not tail call opt if either caller or callee uses struct return
5235 // semantics.
5236 auto IsCallerStructRet = Caller.hasStructRetAttr();
5237 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5238 if (IsCallerStructRet || IsCalleeStructRet)
5239 return false;
5240
5241 // Do not tail call opt if either the callee or caller has a byval argument.
5242 for (auto &Arg : Outs)
5243 if (Arg.Flags.isByVal())
5244 return false;
5245
5246 // The callee has to preserve all registers the caller needs to preserve.
5247 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5248 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5249 if (CalleeCC != CallerCC) {
5250 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5251 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5252 return false;
5253 }
5254 return true;
5255 }
5256
getPrefTypeAlign(EVT VT,SelectionDAG & DAG)5257 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
5258 return DAG.getDataLayout().getPrefTypeAlign(
5259 VT.getTypeForEVT(*DAG.getContext()));
5260 }
5261
5262 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5263 // and output parameter nodes.
5264 SDValue
LowerCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const5265 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
5266 SmallVectorImpl<SDValue> &InVals) const {
5267 SelectionDAG &DAG = CLI.DAG;
5268 SDLoc &DL = CLI.DL;
5269 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5270 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5271 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5272 SDValue Chain = CLI.Chain;
5273 SDValue Callee = CLI.Callee;
5274 CallingConv::ID CallConv = CLI.CallConv;
5275 bool IsVarArg = CLI.IsVarArg;
5276 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5277 MVT GRLenVT = Subtarget.getGRLenVT();
5278 bool &IsTailCall = CLI.IsTailCall;
5279
5280 MachineFunction &MF = DAG.getMachineFunction();
5281
5282 // Analyze the operands of the call, assigning locations to each operand.
5283 SmallVector<CCValAssign> ArgLocs;
5284 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5285
5286 if (CallConv == CallingConv::GHC)
5287 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5288 else
5289 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5290
5291 // Check if it's really possible to do a tail call.
5292 if (IsTailCall)
5293 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5294
5295 if (IsTailCall)
5296 ++NumTailCalls;
5297 else if (CLI.CB && CLI.CB->isMustTailCall())
5298 report_fatal_error("failed to perform tail call elimination on a call "
5299 "site marked musttail");
5300
5301 // Get a count of how many bytes are to be pushed on the stack.
5302 unsigned NumBytes = ArgCCInfo.getStackSize();
5303
5304 // Create local copies for byval args.
5305 SmallVector<SDValue> ByValArgs;
5306 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5307 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5308 if (!Flags.isByVal())
5309 continue;
5310
5311 SDValue Arg = OutVals[i];
5312 unsigned Size = Flags.getByValSize();
5313 Align Alignment = Flags.getNonZeroByValAlign();
5314
5315 int FI =
5316 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5317 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5318 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5319
5320 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5321 /*IsVolatile=*/false,
5322 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5323 MachinePointerInfo(), MachinePointerInfo());
5324 ByValArgs.push_back(FIPtr);
5325 }
5326
5327 if (!IsTailCall)
5328 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5329
5330 // Copy argument values to their designated locations.
5331 SmallVector<std::pair<Register, SDValue>> RegsToPass;
5332 SmallVector<SDValue> MemOpChains;
5333 SDValue StackPtr;
5334 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5335 CCValAssign &VA = ArgLocs[i];
5336 SDValue ArgValue = OutVals[i];
5337 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5338
5339 // Promote the value if needed.
5340 // For now, only handle fully promoted and indirect arguments.
5341 if (VA.getLocInfo() == CCValAssign::Indirect) {
5342 // Store the argument in a stack slot and pass its address.
5343 Align StackAlign =
5344 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5345 getPrefTypeAlign(ArgValue.getValueType(), DAG));
5346 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5347 // If the original argument was split and passed by reference, we need to
5348 // store the required parts of it here (and pass just one address).
5349 unsigned ArgIndex = Outs[i].OrigArgIndex;
5350 unsigned ArgPartOffset = Outs[i].PartOffset;
5351 assert(ArgPartOffset == 0);
5352 // Calculate the total size to store. We don't have access to what we're
5353 // actually storing other than performing the loop and collecting the
5354 // info.
5355 SmallVector<std::pair<SDValue, SDValue>> Parts;
5356 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5357 SDValue PartValue = OutVals[i + 1];
5358 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5359 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5360 EVT PartVT = PartValue.getValueType();
5361
5362 StoredSize += PartVT.getStoreSize();
5363 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5364 Parts.push_back(std::make_pair(PartValue, Offset));
5365 ++i;
5366 }
5367 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5368 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5369 MemOpChains.push_back(
5370 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5371 MachinePointerInfo::getFixedStack(MF, FI)));
5372 for (const auto &Part : Parts) {
5373 SDValue PartValue = Part.first;
5374 SDValue PartOffset = Part.second;
5375 SDValue Address =
5376 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5377 MemOpChains.push_back(
5378 DAG.getStore(Chain, DL, PartValue, Address,
5379 MachinePointerInfo::getFixedStack(MF, FI)));
5380 }
5381 ArgValue = SpillSlot;
5382 } else {
5383 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5384 }
5385
5386 // Use local copy if it is a byval arg.
5387 if (Flags.isByVal())
5388 ArgValue = ByValArgs[j++];
5389
5390 if (VA.isRegLoc()) {
5391 // Queue up the argument copies and emit them at the end.
5392 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5393 } else {
5394 assert(VA.isMemLoc() && "Argument not register or memory");
5395 assert(!IsTailCall && "Tail call not allowed if stack is used "
5396 "for passing parameters");
5397
5398 // Work out the address of the stack slot.
5399 if (!StackPtr.getNode())
5400 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5401 SDValue Address =
5402 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5403 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
5404
5405 // Emit the store.
5406 MemOpChains.push_back(
5407 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5408 }
5409 }
5410
5411 // Join the stores, which are independent of one another.
5412 if (!MemOpChains.empty())
5413 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5414
5415 SDValue Glue;
5416
5417 // Build a sequence of copy-to-reg nodes, chained and glued together.
5418 for (auto &Reg : RegsToPass) {
5419 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5420 Glue = Chain.getValue(1);
5421 }
5422
5423 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5424 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5425 // split it and then direct call can be matched by PseudoCALL.
5426 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5427 const GlobalValue *GV = S->getGlobal();
5428 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5429 ? LoongArchII::MO_CALL
5430 : LoongArchII::MO_CALL_PLT;
5431 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5432 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5433 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5434 ? LoongArchII::MO_CALL
5435 : LoongArchII::MO_CALL_PLT;
5436 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5437 }
5438
5439 // The first call operand is the chain and the second is the target address.
5440 SmallVector<SDValue> Ops;
5441 Ops.push_back(Chain);
5442 Ops.push_back(Callee);
5443
5444 // Add argument registers to the end of the list so that they are
5445 // known live into the call.
5446 for (auto &Reg : RegsToPass)
5447 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5448
5449 if (!IsTailCall) {
5450 // Add a register mask operand representing the call-preserved registers.
5451 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5452 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5453 assert(Mask && "Missing call preserved mask for calling convention");
5454 Ops.push_back(DAG.getRegisterMask(Mask));
5455 }
5456
5457 // Glue the call to the argument copies, if any.
5458 if (Glue.getNode())
5459 Ops.push_back(Glue);
5460
5461 // Emit the call.
5462 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5463 unsigned Op;
5464 switch (DAG.getTarget().getCodeModel()) {
5465 default:
5466 report_fatal_error("Unsupported code model");
5467 case CodeModel::Small:
5468 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5469 break;
5470 case CodeModel::Medium:
5471 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5472 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
5473 break;
5474 case CodeModel::Large:
5475 assert(Subtarget.is64Bit() && "Large code model requires LA64");
5476 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
5477 break;
5478 }
5479
5480 if (IsTailCall) {
5481 MF.getFrameInfo().setHasTailCall();
5482 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5483 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5484 return Ret;
5485 }
5486
5487 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5488 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5489 Glue = Chain.getValue(1);
5490
5491 // Mark the end of the call, which is glued to the call itself.
5492 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5493 Glue = Chain.getValue(1);
5494
5495 // Assign locations to each value returned by this call.
5496 SmallVector<CCValAssign> RVLocs;
5497 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5498 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5499
5500 // Copy all of the result registers out of their specified physreg.
5501 for (auto &VA : RVLocs) {
5502 // Copy the value out.
5503 SDValue RetValue =
5504 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5505 // Glue the RetValue to the end of the call sequence.
5506 Chain = RetValue.getValue(1);
5507 Glue = RetValue.getValue(2);
5508
5509 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5510
5511 InVals.push_back(RetValue);
5512 }
5513
5514 return Chain;
5515 }
5516
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context) const5517 bool LoongArchTargetLowering::CanLowerReturn(
5518 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5519 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5520 SmallVector<CCValAssign> RVLocs;
5521 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5522
5523 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5524 LoongArchABI::ABI ABI =
5525 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5526 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5527 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5528 nullptr))
5529 return false;
5530 }
5531 return true;
5532 }
5533
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const5534 SDValue LoongArchTargetLowering::LowerReturn(
5535 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5536 const SmallVectorImpl<ISD::OutputArg> &Outs,
5537 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5538 SelectionDAG &DAG) const {
5539 // Stores the assignment of the return value to a location.
5540 SmallVector<CCValAssign> RVLocs;
5541
5542 // Info about the registers and stack slot.
5543 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5544 *DAG.getContext());
5545
5546 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5547 nullptr, CC_LoongArch);
5548 if (CallConv == CallingConv::GHC && !RVLocs.empty())
5549 report_fatal_error("GHC functions return void only");
5550 SDValue Glue;
5551 SmallVector<SDValue, 4> RetOps(1, Chain);
5552
5553 // Copy the result values into the output registers.
5554 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5555 CCValAssign &VA = RVLocs[i];
5556 assert(VA.isRegLoc() && "Can only return in registers!");
5557
5558 // Handle a 'normal' return.
5559 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5560 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5561
5562 // Guarantee that all emitted copies are stuck together.
5563 Glue = Chain.getValue(1);
5564 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5565 }
5566
5567 RetOps[0] = Chain; // Update chain.
5568
5569 // Add the glue node if we have it.
5570 if (Glue.getNode())
5571 RetOps.push_back(Glue);
5572
5573 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5574 }
5575
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const5576 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5577 bool ForCodeSize) const {
5578 // TODO: Maybe need more checks here after vector extension is supported.
5579 if (VT == MVT::f32 && !Subtarget.hasBasicF())
5580 return false;
5581 if (VT == MVT::f64 && !Subtarget.hasBasicD())
5582 return false;
5583 return (Imm.isZero() || Imm.isExactlyValue(+1.0));
5584 }
5585
isCheapToSpeculateCttz(Type *) const5586 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
5587 return true;
5588 }
5589
isCheapToSpeculateCtlz(Type *) const5590 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
5591 return true;
5592 }
5593
shouldInsertFencesForAtomic(const Instruction * I) const5594 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5595 const Instruction *I) const {
5596 if (!Subtarget.is64Bit())
5597 return isa<LoadInst>(I) || isa<StoreInst>(I);
5598
5599 if (isa<LoadInst>(I))
5600 return true;
5601
5602 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5603 // require fences beacuse we can use amswap_db.[w/d].
5604 Type *Ty = I->getOperand(0)->getType();
5605 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
5606 unsigned Size = Ty->getIntegerBitWidth();
5607 return (Size == 8 || Size == 16);
5608 }
5609
5610 return false;
5611 }
5612
getSetCCResultType(const DataLayout & DL,LLVMContext & Context,EVT VT) const5613 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
5614 LLVMContext &Context,
5615 EVT VT) const {
5616 if (!VT.isVector())
5617 return getPointerTy(DL);
5618 return VT.changeVectorElementTypeToInteger();
5619 }
5620
hasAndNot(SDValue Y) const5621 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
5622 // TODO: Support vectors.
5623 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5624 }
5625
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const5626 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5627 const CallInst &I,
5628 MachineFunction &MF,
5629 unsigned Intrinsic) const {
5630 switch (Intrinsic) {
5631 default:
5632 return false;
5633 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5634 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5635 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5636 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5637 Info.opc = ISD::INTRINSIC_W_CHAIN;
5638 Info.memVT = MVT::i32;
5639 Info.ptrVal = I.getArgOperand(0);
5640 Info.offset = 0;
5641 Info.align = Align(4);
5642 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5643 MachineMemOperand::MOVolatile;
5644 return true;
5645 // TODO: Add more Intrinsics later.
5646 }
5647 }
5648
5649 TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const5650 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
5651 // TODO: Add more AtomicRMWInst that needs to be extended.
5652
5653 // Since floating-point operation requires a non-trivial set of data
5654 // operations, use CmpXChg to expand.
5655 if (AI->isFloatingPointOperation() ||
5656 AI->getOperation() == AtomicRMWInst::UIncWrap ||
5657 AI->getOperation() == AtomicRMWInst::UDecWrap)
5658 return AtomicExpansionKind::CmpXChg;
5659
5660 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5661 if (Size == 8 || Size == 16)
5662 return AtomicExpansionKind::MaskedIntrinsic;
5663 return AtomicExpansionKind::None;
5664 }
5665
5666 static Intrinsic::ID
getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,AtomicRMWInst::BinOp BinOp)5667 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
5668 AtomicRMWInst::BinOp BinOp) {
5669 if (GRLen == 64) {
5670 switch (BinOp) {
5671 default:
5672 llvm_unreachable("Unexpected AtomicRMW BinOp");
5673 case AtomicRMWInst::Xchg:
5674 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5675 case AtomicRMWInst::Add:
5676 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5677 case AtomicRMWInst::Sub:
5678 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5679 case AtomicRMWInst::Nand:
5680 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5681 case AtomicRMWInst::UMax:
5682 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5683 case AtomicRMWInst::UMin:
5684 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5685 case AtomicRMWInst::Max:
5686 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5687 case AtomicRMWInst::Min:
5688 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5689 // TODO: support other AtomicRMWInst.
5690 }
5691 }
5692
5693 if (GRLen == 32) {
5694 switch (BinOp) {
5695 default:
5696 llvm_unreachable("Unexpected AtomicRMW BinOp");
5697 case AtomicRMWInst::Xchg:
5698 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5699 case AtomicRMWInst::Add:
5700 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5701 case AtomicRMWInst::Sub:
5702 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5703 case AtomicRMWInst::Nand:
5704 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5705 // TODO: support other AtomicRMWInst.
5706 }
5707 }
5708
5709 llvm_unreachable("Unexpected GRLen\n");
5710 }
5711
5712 TargetLowering::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst * CI) const5713 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
5714 AtomicCmpXchgInst *CI) const {
5715 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
5716 if (Size == 8 || Size == 16)
5717 return AtomicExpansionKind::MaskedIntrinsic;
5718 return AtomicExpansionKind::None;
5719 }
5720
emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase & Builder,AtomicCmpXchgInst * CI,Value * AlignedAddr,Value * CmpVal,Value * NewVal,Value * Mask,AtomicOrdering Ord) const5721 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
5722 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5723 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5724 AtomicOrdering FailOrd = CI->getFailureOrdering();
5725 Value *FailureOrdering =
5726 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
5727
5728 // TODO: Support cmpxchg on LA32.
5729 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
5730 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
5731 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
5732 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5733 Type *Tys[] = {AlignedAddr->getType()};
5734 Function *MaskedCmpXchg =
5735 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
5736 Value *Result = Builder.CreateCall(
5737 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
5738 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5739 return Result;
5740 }
5741
emitMaskedAtomicRMWIntrinsic(IRBuilderBase & Builder,AtomicRMWInst * AI,Value * AlignedAddr,Value * Incr,Value * Mask,Value * ShiftAmt,AtomicOrdering Ord) const5742 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
5743 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
5744 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
5745 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
5746 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
5747 // mask, as this produces better code than the LL/SC loop emitted by
5748 // int_loongarch_masked_atomicrmw_xchg.
5749 if (AI->getOperation() == AtomicRMWInst::Xchg &&
5750 isa<ConstantInt>(AI->getValOperand())) {
5751 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
5752 if (CVal->isZero())
5753 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
5754 Builder.CreateNot(Mask, "Inv_Mask"),
5755 AI->getAlign(), Ord);
5756 if (CVal->isMinusOne())
5757 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
5758 AI->getAlign(), Ord);
5759 }
5760
5761 unsigned GRLen = Subtarget.getGRLen();
5762 Value *Ordering =
5763 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
5764 Type *Tys[] = {AlignedAddr->getType()};
5765 Function *LlwOpScwLoop = Intrinsic::getDeclaration(
5766 AI->getModule(),
5767 getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
5768
5769 if (GRLen == 64) {
5770 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
5771 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5772 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
5773 }
5774
5775 Value *Result;
5776
5777 // Must pass the shift amount needed to sign extend the loaded value prior
5778 // to performing a signed comparison for min/max. ShiftAmt is the number of
5779 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
5780 // is the number of bits to left+right shift the value in order to
5781 // sign-extend.
5782 if (AI->getOperation() == AtomicRMWInst::Min ||
5783 AI->getOperation() == AtomicRMWInst::Max) {
5784 const DataLayout &DL = AI->getDataLayout();
5785 unsigned ValWidth =
5786 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
5787 Value *SextShamt =
5788 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
5789 Result = Builder.CreateCall(LlwOpScwLoop,
5790 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
5791 } else {
5792 Result =
5793 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
5794 }
5795
5796 if (GRLen == 64)
5797 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5798 return Result;
5799 }
5800
isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT VT) const5801 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
5802 const MachineFunction &MF, EVT VT) const {
5803 VT = VT.getScalarType();
5804
5805 if (!VT.isSimple())
5806 return false;
5807
5808 switch (VT.getSimpleVT().SimpleTy) {
5809 case MVT::f32:
5810 case MVT::f64:
5811 return true;
5812 default:
5813 break;
5814 }
5815
5816 return false;
5817 }
5818
getExceptionPointerRegister(const Constant * PersonalityFn) const5819 Register LoongArchTargetLowering::getExceptionPointerRegister(
5820 const Constant *PersonalityFn) const {
5821 return LoongArch::R4;
5822 }
5823
getExceptionSelectorRegister(const Constant * PersonalityFn) const5824 Register LoongArchTargetLowering::getExceptionSelectorRegister(
5825 const Constant *PersonalityFn) const {
5826 return LoongArch::R5;
5827 }
5828
5829 //===----------------------------------------------------------------------===//
5830 // LoongArch Inline Assembly Support
5831 //===----------------------------------------------------------------------===//
5832
5833 LoongArchTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const5834 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
5835 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
5836 //
5837 // 'f': A floating-point register (if available).
5838 // 'k': A memory operand whose address is formed by a base register and
5839 // (optionally scaled) index register.
5840 // 'l': A signed 16-bit constant.
5841 // 'm': A memory operand whose address is formed by a base register and
5842 // offset that is suitable for use in instructions with the same
5843 // addressing mode as st.w and ld.w.
5844 // 'I': A signed 12-bit constant (for arithmetic instructions).
5845 // 'J': Integer zero.
5846 // 'K': An unsigned 12-bit constant (for logic instructions).
5847 // "ZB": An address that is held in a general-purpose register. The offset is
5848 // zero.
5849 // "ZC": A memory operand whose address is formed by a base register and
5850 // offset that is suitable for use in instructions with the same
5851 // addressing mode as ll.w and sc.w.
5852 if (Constraint.size() == 1) {
5853 switch (Constraint[0]) {
5854 default:
5855 break;
5856 case 'f':
5857 return C_RegisterClass;
5858 case 'l':
5859 case 'I':
5860 case 'J':
5861 case 'K':
5862 return C_Immediate;
5863 case 'k':
5864 return C_Memory;
5865 }
5866 }
5867
5868 if (Constraint == "ZC" || Constraint == "ZB")
5869 return C_Memory;
5870
5871 // 'm' is handled here.
5872 return TargetLowering::getConstraintType(Constraint);
5873 }
5874
getInlineAsmMemConstraint(StringRef ConstraintCode) const5875 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
5876 StringRef ConstraintCode) const {
5877 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
5878 .Case("k", InlineAsm::ConstraintCode::k)
5879 .Case("ZB", InlineAsm::ConstraintCode::ZB)
5880 .Case("ZC", InlineAsm::ConstraintCode::ZC)
5881 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
5882 }
5883
5884 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const5885 LoongArchTargetLowering::getRegForInlineAsmConstraint(
5886 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5887 // First, see if this is a constraint that directly corresponds to a LoongArch
5888 // register class.
5889 if (Constraint.size() == 1) {
5890 switch (Constraint[0]) {
5891 case 'r':
5892 // TODO: Support fixed vectors up to GRLen?
5893 if (VT.isVector())
5894 break;
5895 return std::make_pair(0U, &LoongArch::GPRRegClass);
5896 case 'f':
5897 if (Subtarget.hasBasicF() && VT == MVT::f32)
5898 return std::make_pair(0U, &LoongArch::FPR32RegClass);
5899 if (Subtarget.hasBasicD() && VT == MVT::f64)
5900 return std::make_pair(0U, &LoongArch::FPR64RegClass);
5901 if (Subtarget.hasExtLSX() &&
5902 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
5903 return std::make_pair(0U, &LoongArch::LSX128RegClass);
5904 if (Subtarget.hasExtLASX() &&
5905 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
5906 return std::make_pair(0U, &LoongArch::LASX256RegClass);
5907 break;
5908 default:
5909 break;
5910 }
5911 }
5912
5913 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
5914 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
5915 // constraints while the official register name is prefixed with a '$'. So we
5916 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
5917 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
5918 // case insensitive, so no need to convert the constraint to upper case here.
5919 //
5920 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
5921 // decode the usage of register name aliases into their official names. And
5922 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
5923 // official register names.
5924 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
5925 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
5926 bool IsFP = Constraint[2] == 'f';
5927 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
5928 std::pair<unsigned, const TargetRegisterClass *> R;
5929 R = TargetLowering::getRegForInlineAsmConstraint(
5930 TRI, join_items("", Temp.first, Temp.second), VT);
5931 // Match those names to the widest floating point register type available.
5932 if (IsFP) {
5933 unsigned RegNo = R.first;
5934 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
5935 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
5936 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
5937 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
5938 }
5939 }
5940 }
5941 return R;
5942 }
5943
5944 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5945 }
5946
LowerAsmOperandForConstraint(SDValue Op,StringRef Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const5947 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
5948 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
5949 SelectionDAG &DAG) const {
5950 // Currently only support length 1 constraints.
5951 if (Constraint.size() == 1) {
5952 switch (Constraint[0]) {
5953 case 'l':
5954 // Validate & create a 16-bit signed immediate operand.
5955 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5956 uint64_t CVal = C->getSExtValue();
5957 if (isInt<16>(CVal))
5958 Ops.push_back(
5959 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5960 }
5961 return;
5962 case 'I':
5963 // Validate & create a 12-bit signed immediate operand.
5964 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5965 uint64_t CVal = C->getSExtValue();
5966 if (isInt<12>(CVal))
5967 Ops.push_back(
5968 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5969 }
5970 return;
5971 case 'J':
5972 // Validate & create an integer zero operand.
5973 if (auto *C = dyn_cast<ConstantSDNode>(Op))
5974 if (C->getZExtValue() == 0)
5975 Ops.push_back(
5976 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
5977 return;
5978 case 'K':
5979 // Validate & create a 12-bit unsigned immediate operand.
5980 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5981 uint64_t CVal = C->getZExtValue();
5982 if (isUInt<12>(CVal))
5983 Ops.push_back(
5984 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5985 }
5986 return;
5987 default:
5988 break;
5989 }
5990 }
5991 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5992 }
5993
5994 #define GET_REGISTER_MATCHER
5995 #include "LoongArchGenAsmMatcher.inc"
5996
5997 Register
getRegisterByName(const char * RegName,LLT VT,const MachineFunction & MF) const5998 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
5999 const MachineFunction &MF) const {
6000 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
6001 std::string NewRegName = Name.second.str();
6002 Register Reg = MatchRegisterAltName(NewRegName);
6003 if (Reg == LoongArch::NoRegister)
6004 Reg = MatchRegisterName(NewRegName);
6005 if (Reg == LoongArch::NoRegister)
6006 report_fatal_error(
6007 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6008 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6009 if (!ReservedRegs.test(Reg))
6010 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6011 StringRef(RegName) + "\"."));
6012 return Reg;
6013 }
6014
decomposeMulByConstant(LLVMContext & Context,EVT VT,SDValue C) const6015 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
6016 EVT VT, SDValue C) const {
6017 // TODO: Support vectors.
6018 if (!VT.isScalarInteger())
6019 return false;
6020
6021 // Omit the optimization if the data size exceeds GRLen.
6022 if (VT.getSizeInBits() > Subtarget.getGRLen())
6023 return false;
6024
6025 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6026 const APInt &Imm = ConstNode->getAPIntValue();
6027 // Break MUL into (SLLI + ADD/SUB) or ALSL.
6028 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6029 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6030 return true;
6031 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6032 if (ConstNode->hasOneUse() &&
6033 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6034 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6035 return true;
6036 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6037 // in which the immediate has two set bits. Or Break (MUL x, imm)
6038 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6039 // equals to (1 << s0) - (1 << s1).
6040 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6041 unsigned Shifts = Imm.countr_zero();
6042 // Reject immediates which can be composed via a single LUI.
6043 if (Shifts >= 12)
6044 return false;
6045 // Reject multiplications can be optimized to
6046 // (SLLI (ALSL x, x, 1/2/3/4), s).
6047 APInt ImmPop = Imm.ashr(Shifts);
6048 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6049 return false;
6050 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6051 // since it needs one more instruction than other 3 cases.
6052 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6053 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6054 (ImmSmall - Imm).isPowerOf2())
6055 return true;
6056 }
6057 }
6058
6059 return false;
6060 }
6061
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const6062 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
6063 const AddrMode &AM,
6064 Type *Ty, unsigned AS,
6065 Instruction *I) const {
6066 // LoongArch has four basic addressing modes:
6067 // 1. reg
6068 // 2. reg + 12-bit signed offset
6069 // 3. reg + 14-bit signed offset left-shifted by 2
6070 // 4. reg1 + reg2
6071 // TODO: Add more checks after support vector extension.
6072
6073 // No global is ever allowed as a base.
6074 if (AM.BaseGV)
6075 return false;
6076
6077 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6078 // with `UAL` feature.
6079 if (!isInt<12>(AM.BaseOffs) &&
6080 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6081 return false;
6082
6083 switch (AM.Scale) {
6084 case 0:
6085 // "r+i" or just "i", depending on HasBaseReg.
6086 break;
6087 case 1:
6088 // "r+r+i" is not allowed.
6089 if (AM.HasBaseReg && AM.BaseOffs)
6090 return false;
6091 // Otherwise we have "r+r" or "r+i".
6092 break;
6093 case 2:
6094 // "2*r+r" or "2*r+i" is not allowed.
6095 if (AM.HasBaseReg || AM.BaseOffs)
6096 return false;
6097 // Allow "2*r" as "r+r".
6098 break;
6099 default:
6100 return false;
6101 }
6102
6103 return true;
6104 }
6105
isLegalICmpImmediate(int64_t Imm) const6106 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
6107 return isInt<12>(Imm);
6108 }
6109
isLegalAddImmediate(int64_t Imm) const6110 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
6111 return isInt<12>(Imm);
6112 }
6113
isZExtFree(SDValue Val,EVT VT2) const6114 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
6115 // Zexts are free if they can be combined with a load.
6116 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6117 // poorly with type legalization of compares preferring sext.
6118 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6119 EVT MemVT = LD->getMemoryVT();
6120 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6121 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6122 LD->getExtensionType() == ISD::ZEXTLOAD))
6123 return true;
6124 }
6125
6126 return TargetLowering::isZExtFree(Val, VT2);
6127 }
6128
isSExtCheaperThanZExt(EVT SrcVT,EVT DstVT) const6129 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
6130 EVT DstVT) const {
6131 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6132 }
6133
signExtendConstant(const ConstantInt * CI) const6134 bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
6135 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6136 }
6137
hasAndNotCompare(SDValue Y) const6138 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
6139 // TODO: Support vectors.
6140 if (Y.getValueType().isVector())
6141 return false;
6142
6143 return !isa<ConstantSDNode>(Y);
6144 }
6145
getExtendForAtomicCmpSwapArg() const6146 ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
6147 // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
6148 return ISD::SIGN_EXTEND;
6149 }
6150
shouldSignExtendTypeInLibCall(EVT Type,bool IsSigned) const6151 bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
6152 EVT Type, bool IsSigned) const {
6153 if (Subtarget.is64Bit() && Type == MVT::i32)
6154 return true;
6155
6156 return IsSigned;
6157 }
6158
shouldExtendTypeInLibCall(EVT Type) const6159 bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
6160 // Return false to suppress the unnecessary extensions if the LibCall
6161 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6162 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6163 Type.getSizeInBits() < Subtarget.getGRLen()))
6164 return false;
6165 return true;
6166 }
6167