xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64LegalizerInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18 #include "llvm/CodeGen/GlobalISel/Utils.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetOpcodes.h"
22 #include "llvm/CodeGen/ValueTypes.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/Type.h"
25 
26 #define DEBUG_TYPE "aarch64-legalinfo"
27 
28 using namespace llvm;
29 using namespace LegalizeActions;
30 using namespace LegalizeMutations;
31 using namespace LegalityPredicates;
32 
33 AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
34     : ST(&ST) {
35   using namespace TargetOpcode;
36   const LLT p0 = LLT::pointer(0, 64);
37   const LLT s1 = LLT::scalar(1);
38   const LLT s8 = LLT::scalar(8);
39   const LLT s16 = LLT::scalar(16);
40   const LLT s32 = LLT::scalar(32);
41   const LLT s64 = LLT::scalar(64);
42   const LLT s128 = LLT::scalar(128);
43   const LLT s256 = LLT::scalar(256);
44   const LLT s512 = LLT::scalar(512);
45   const LLT v16s8 = LLT::vector(16, 8);
46   const LLT v8s8 = LLT::vector(8, 8);
47   const LLT v4s8 = LLT::vector(4, 8);
48   const LLT v8s16 = LLT::vector(8, 16);
49   const LLT v4s16 = LLT::vector(4, 16);
50   const LLT v2s16 = LLT::vector(2, 16);
51   const LLT v2s32 = LLT::vector(2, 32);
52   const LLT v4s32 = LLT::vector(4, 32);
53   const LLT v2s64 = LLT::vector(2, 64);
54   const LLT v2p0 = LLT::vector(2, p0);
55 
56   const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
57 
58   // FIXME: support subtargets which have neon/fp-armv8 disabled.
59   if (!ST.hasNEON() || !ST.hasFPARMv8()) {
60     computeTables();
61     return;
62   }
63 
64   getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
65     .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
66     .clampScalar(0, s1, s64)
67     .widenScalarToNextPow2(0, 8)
68     .fewerElementsIf(
69       [=](const LegalityQuery &Query) {
70         return Query.Types[0].isVector() &&
71           (Query.Types[0].getElementType() != s64 ||
72            Query.Types[0].getNumElements() != 2);
73       },
74       [=](const LegalityQuery &Query) {
75         LLT EltTy = Query.Types[0].getElementType();
76         if (EltTy == s64)
77           return std::make_pair(0, LLT::vector(2, 64));
78         return std::make_pair(0, EltTy);
79       });
80 
81   getActionDefinitionsBuilder(G_PHI)
82       .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
83       .clampScalar(0, s16, s64)
84       .widenScalarToNextPow2(0);
85 
86   getActionDefinitionsBuilder(G_BSWAP)
87       .legalFor({s32, s64, v4s32, v2s32, v2s64})
88       .clampScalar(0, s32, s64)
89       .widenScalarToNextPow2(0);
90 
91   getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
92       .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
93       .clampScalar(0, s32, s64)
94       .widenScalarToNextPow2(0)
95       .clampNumElements(0, v2s32, v4s32)
96       .clampNumElements(0, v2s64, v2s64)
97       .moreElementsToNextPow2(0);
98 
99   getActionDefinitionsBuilder(G_SHL)
100       .customIf([=](const LegalityQuery &Query) {
101         const auto &SrcTy = Query.Types[0];
102         const auto &AmtTy = Query.Types[1];
103         return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
104                AmtTy.getSizeInBits() == 32;
105       })
106       .legalFor({{s32, s32},
107                  {s64, s64},
108                  {s32, s64},
109                  {v2s32, v2s32},
110                  {v4s32, v4s32},
111                  {v2s64, v2s64}})
112       .clampScalar(1, s32, s64)
113       .clampScalar(0, s32, s64)
114       .widenScalarToNextPow2(0)
115       .clampNumElements(0, v2s32, v4s32)
116       .clampNumElements(0, v2s64, v2s64)
117       .moreElementsToNextPow2(0)
118       .minScalarSameAs(1, 0);
119 
120   getActionDefinitionsBuilder(G_PTR_ADD)
121       .legalFor({{p0, s64}, {v2p0, v2s64}})
122       .clampScalar(1, s64, s64);
123 
124   getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
125 
126   getActionDefinitionsBuilder({G_SDIV, G_UDIV})
127       .legalFor({s32, s64})
128       .libcallFor({s128})
129       .clampScalar(0, s32, s64)
130       .widenScalarToNextPow2(0)
131       .scalarize(0);
132 
133   getActionDefinitionsBuilder({G_LSHR, G_ASHR})
134       .customIf([=](const LegalityQuery &Query) {
135         const auto &SrcTy = Query.Types[0];
136         const auto &AmtTy = Query.Types[1];
137         return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
138                AmtTy.getSizeInBits() == 32;
139       })
140       .legalFor({{s32, s32},
141                  {s32, s64},
142                  {s64, s64},
143                  {v2s32, v2s32},
144                  {v4s32, v4s32},
145                  {v2s64, v2s64}})
146       .clampScalar(1, s32, s64)
147       .clampScalar(0, s32, s64)
148       .minScalarSameAs(1, 0);
149 
150   getActionDefinitionsBuilder({G_SREM, G_UREM})
151       .lowerFor({s1, s8, s16, s32, s64});
152 
153   getActionDefinitionsBuilder({G_SMULO, G_UMULO})
154       .lowerFor({{s64, s1}});
155 
156   getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
157 
158   getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
159       .legalFor({{s32, s1}, {s64, s1}})
160       .minScalar(0, s32);
161 
162   getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
163     .legalFor({s32, s64, v2s64, v4s32, v2s32});
164 
165   getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
166 
167   getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
168                                G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
169                                G_FNEARBYINT})
170       // If we don't have full FP16 support, then scalarize the elements of
171       // vectors containing fp16 types.
172       .fewerElementsIf(
173           [=, &ST](const LegalityQuery &Query) {
174             const auto &Ty = Query.Types[0];
175             return Ty.isVector() && Ty.getElementType() == s16 &&
176                    !ST.hasFullFP16();
177           },
178           [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
179       // If we don't have full FP16 support, then widen s16 to s32 if we
180       // encounter it.
181       .widenScalarIf(
182           [=, &ST](const LegalityQuery &Query) {
183             return Query.Types[0] == s16 && !ST.hasFullFP16();
184           },
185           [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
186       .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
187 
188   getActionDefinitionsBuilder(
189       {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
190       // We need a call for these, so we always need to scalarize.
191       .scalarize(0)
192       // Regardless of FP16 support, widen 16-bit elements to 32-bits.
193       .minScalar(0, s32)
194       .libcallFor({s32, s64, v2s32, v4s32, v2s64});
195 
196   getActionDefinitionsBuilder(G_INSERT)
197       .unsupportedIf([=](const LegalityQuery &Query) {
198         return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
199       })
200       .legalIf([=](const LegalityQuery &Query) {
201         const LLT &Ty0 = Query.Types[0];
202         const LLT &Ty1 = Query.Types[1];
203         if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
204           return false;
205         return isPowerOf2_32(Ty1.getSizeInBits()) &&
206                (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
207       })
208       .clampScalar(0, s32, s64)
209       .widenScalarToNextPow2(0)
210       .maxScalarIf(typeInSet(0, {s32}), 1, s16)
211       .maxScalarIf(typeInSet(0, {s64}), 1, s32)
212       .widenScalarToNextPow2(1);
213 
214   getActionDefinitionsBuilder(G_EXTRACT)
215       .unsupportedIf([=](const LegalityQuery &Query) {
216         return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
217       })
218       .legalIf([=](const LegalityQuery &Query) {
219         const LLT &Ty0 = Query.Types[0];
220         const LLT &Ty1 = Query.Types[1];
221         if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
222           return false;
223         if (Ty1 == p0)
224           return true;
225         return isPowerOf2_32(Ty0.getSizeInBits()) &&
226                (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
227       })
228       .clampScalar(1, s32, s128)
229       .widenScalarToNextPow2(1)
230       .maxScalarIf(typeInSet(1, {s32}), 0, s16)
231       .maxScalarIf(typeInSet(1, {s64}), 0, s32)
232       .widenScalarToNextPow2(0);
233 
234   getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
235       .legalForTypesWithMemDesc({{s32, p0, 8, 8},
236                                  {s32, p0, 16, 8},
237                                  {s32, p0, 32, 8},
238                                  {s64, p0, 8, 2},
239                                  {s64, p0, 16, 2},
240                                  {s64, p0, 32, 4},
241                                  {s64, p0, 64, 8},
242                                  {p0, p0, 64, 8},
243                                  {v2s32, p0, 64, 8}})
244       .clampScalar(0, s32, s64)
245       .widenScalarToNextPow2(0)
246       // TODO: We could support sum-of-pow2's but the lowering code doesn't know
247       //       how to do that yet.
248       .unsupportedIfMemSizeNotPow2()
249       // Lower anything left over into G_*EXT and G_LOAD
250       .lower();
251 
252   auto IsPtrVecPred = [=](const LegalityQuery &Query) {
253     const LLT &ValTy = Query.Types[0];
254     if (!ValTy.isVector())
255       return false;
256     const LLT EltTy = ValTy.getElementType();
257     return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
258   };
259 
260   getActionDefinitionsBuilder(G_LOAD)
261       .legalForTypesWithMemDesc({{s8, p0, 8, 8},
262                                  {s16, p0, 16, 8},
263                                  {s32, p0, 32, 8},
264                                  {s64, p0, 64, 8},
265                                  {p0, p0, 64, 8},
266                                  {s128, p0, 128, 8},
267                                  {v8s8, p0, 64, 8},
268                                  {v16s8, p0, 128, 8},
269                                  {v4s16, p0, 64, 8},
270                                  {v8s16, p0, 128, 8},
271                                  {v2s32, p0, 64, 8},
272                                  {v4s32, p0, 128, 8},
273                                  {v2s64, p0, 128, 8}})
274       // These extends are also legal
275       .legalForTypesWithMemDesc({{s32, p0, 8, 8},
276                                  {s32, p0, 16, 8}})
277       .clampScalar(0, s8, s64)
278       .lowerIfMemSizeNotPow2()
279       // Lower any any-extending loads left into G_ANYEXT and G_LOAD
280       .lowerIf([=](const LegalityQuery &Query) {
281         return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
282       })
283       .widenScalarToNextPow2(0)
284       .clampMaxNumElements(0, s32, 2)
285       .clampMaxNumElements(0, s64, 1)
286       .customIf(IsPtrVecPred);
287 
288   getActionDefinitionsBuilder(G_STORE)
289       .legalForTypesWithMemDesc({{s8, p0, 8, 8},
290                                  {s16, p0, 16, 8},
291                                  {s32, p0, 8, 8},
292                                  {s32, p0, 16, 8},
293                                  {s32, p0, 32, 8},
294                                  {s64, p0, 64, 8},
295                                  {p0, p0, 64, 8},
296                                  {s128, p0, 128, 8},
297                                  {v16s8, p0, 128, 8},
298                                  {v4s16, p0, 64, 8},
299                                  {v8s16, p0, 128, 8},
300                                  {v2s32, p0, 64, 8},
301                                  {v4s32, p0, 128, 8},
302                                  {v2s64, p0, 128, 8}})
303       .clampScalar(0, s8, s64)
304       .lowerIfMemSizeNotPow2()
305       .lowerIf([=](const LegalityQuery &Query) {
306         return Query.Types[0].isScalar() &&
307                Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
308       })
309       .clampMaxNumElements(0, s32, 2)
310       .clampMaxNumElements(0, s64, 1)
311       .customIf(IsPtrVecPred);
312 
313   // Constants
314   getActionDefinitionsBuilder(G_CONSTANT)
315     .legalFor({p0, s8, s16, s32, s64})
316       .clampScalar(0, s8, s64)
317       .widenScalarToNextPow2(0);
318   getActionDefinitionsBuilder(G_FCONSTANT)
319       .legalFor({s32, s64})
320       .clampScalar(0, s32, s64);
321 
322   getActionDefinitionsBuilder(G_ICMP)
323       .legalFor({{s32, s32},
324                  {s32, s64},
325                  {s32, p0},
326                  {v4s32, v4s32},
327                  {v2s32, v2s32},
328                  {v2s64, v2s64},
329                  {v2s64, v2p0},
330                  {v4s16, v4s16},
331                  {v8s16, v8s16},
332                  {v8s8, v8s8},
333                  {v16s8, v16s8}})
334       .clampScalar(1, s32, s64)
335       .clampScalar(0, s32, s32)
336       .minScalarEltSameAsIf(
337           [=](const LegalityQuery &Query) {
338             const LLT &Ty = Query.Types[0];
339             const LLT &SrcTy = Query.Types[1];
340             return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
341                    Ty.getElementType() != SrcTy.getElementType();
342           },
343           0, 1)
344       .minScalarOrEltIf(
345           [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
346           1, s32)
347       .minScalarOrEltIf(
348           [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
349           s64)
350       .widenScalarOrEltToNextPow2(1);
351 
352   getActionDefinitionsBuilder(G_FCMP)
353       .legalFor({{s32, s32}, {s32, s64}})
354       .clampScalar(0, s32, s32)
355       .clampScalar(1, s32, s64)
356       .widenScalarToNextPow2(1);
357 
358   // Extensions
359   auto ExtLegalFunc = [=](const LegalityQuery &Query) {
360     unsigned DstSize = Query.Types[0].getSizeInBits();
361 
362     if (DstSize == 128 && !Query.Types[0].isVector())
363       return false; // Extending to a scalar s128 needs narrowing.
364 
365     // Make sure that we have something that will fit in a register, and
366     // make sure it's a power of 2.
367     if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
368       return false;
369 
370     const LLT &SrcTy = Query.Types[1];
371 
372     // Special case for s1.
373     if (SrcTy == s1)
374       return true;
375 
376     // Make sure we fit in a register otherwise. Don't bother checking that
377     // the source type is below 128 bits. We shouldn't be allowing anything
378     // through which is wider than the destination in the first place.
379     unsigned SrcSize = SrcTy.getSizeInBits();
380     if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
381       return false;
382 
383     return true;
384   };
385   getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
386       .legalIf(ExtLegalFunc)
387       .clampScalar(0, s64, s64); // Just for s128, others are handled above.
388 
389   getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
390 
391   getActionDefinitionsBuilder(G_SEXT_INREG)
392     .legalFor({s32, s64})
393     .lower();
394 
395   // FP conversions
396   getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
397       {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
398   getActionDefinitionsBuilder(G_FPEXT).legalFor(
399       {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
400 
401   // Conversions
402   getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
403       .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
404       .clampScalar(0, s32, s64)
405       .widenScalarToNextPow2(0)
406       .clampScalar(1, s32, s64)
407       .widenScalarToNextPow2(1);
408 
409   getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
410       .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
411       .clampScalar(1, s32, s64)
412       .widenScalarToNextPow2(1)
413       .clampScalar(0, s32, s64)
414       .widenScalarToNextPow2(0);
415 
416   // Control-flow
417   getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
418   getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
419 
420   // Select
421   // FIXME: We can probably do a bit better than just scalarizing vector
422   // selects.
423   getActionDefinitionsBuilder(G_SELECT)
424       .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
425       .clampScalar(0, s32, s64)
426       .widenScalarToNextPow2(0)
427       .scalarize(0);
428 
429   // Pointer-handling
430   getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
431 
432   if (TM.getCodeModel() == CodeModel::Small)
433     getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
434   else
435     getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
436 
437   getActionDefinitionsBuilder(G_PTRTOINT)
438       .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
439       .maxScalar(0, s64)
440       .widenScalarToNextPow2(0, /*Min*/ 8);
441 
442   getActionDefinitionsBuilder(G_INTTOPTR)
443       .unsupportedIf([&](const LegalityQuery &Query) {
444         return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
445       })
446       .legalFor({{p0, s64}});
447 
448   // Casts for 32 and 64-bit width type are just copies.
449   // Same for 128-bit width type, except they are on the FPR bank.
450   getActionDefinitionsBuilder(G_BITCAST)
451       // FIXME: This is wrong since G_BITCAST is not allowed to change the
452       // number of bits but it's what the previous code described and fixing
453       // it breaks tests.
454       .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
455                                  v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
456                                  v2p0});
457 
458   getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
459 
460   // va_list must be a pointer, but most sized types are pretty easy to handle
461   // as the destination.
462   getActionDefinitionsBuilder(G_VAARG)
463       .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
464       .clampScalar(0, s8, s64)
465       .widenScalarToNextPow2(0, /*Min*/ 8);
466 
467   if (ST.hasLSE()) {
468     getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
469         .lowerIf(all(
470             typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
471             atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
472 
473     getActionDefinitionsBuilder(
474         {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
475          G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
476          G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
477         .legalIf(all(
478             typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
479             atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
480   }
481 
482   getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
483 
484   // Merge/Unmerge
485   for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
486     unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
487     unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
488 
489     auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
490       const LLT &Ty = Query.Types[TypeIdx];
491       if (Ty.isVector()) {
492         const LLT &EltTy = Ty.getElementType();
493         if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
494           return true;
495         if (!isPowerOf2_32(EltTy.getSizeInBits()))
496           return true;
497       }
498       return false;
499     };
500 
501     // FIXME: This rule is horrible, but specifies the same as what we had
502     // before with the particularly strange definitions removed (e.g.
503     // s8 = G_MERGE_VALUES s32, s32).
504     // Part of the complexity comes from these ops being extremely flexible. For
505     // example, you can build/decompose vectors with it, concatenate vectors,
506     // etc. and in addition to this you can also bitcast with it at the same
507     // time. We've been considering breaking it up into multiple ops to make it
508     // more manageable throughout the backend.
509     getActionDefinitionsBuilder(Op)
510         // Break up vectors with weird elements into scalars
511         .fewerElementsIf(
512             [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
513             scalarize(0))
514         .fewerElementsIf(
515             [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
516             scalarize(1))
517         // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
518         // or 384.
519         .clampScalar(BigTyIdx, s8, s512)
520         .widenScalarIf(
521             [=](const LegalityQuery &Query) {
522               const LLT &Ty = Query.Types[BigTyIdx];
523               return !isPowerOf2_32(Ty.getSizeInBits()) &&
524                      Ty.getSizeInBits() % 64 != 0;
525             },
526             [=](const LegalityQuery &Query) {
527               // Pick the next power of 2, or a multiple of 64 over 128.
528               // Whichever is smaller.
529               const LLT &Ty = Query.Types[BigTyIdx];
530               unsigned NewSizeInBits = 1
531                                        << Log2_32_Ceil(Ty.getSizeInBits() + 1);
532               if (NewSizeInBits >= 256) {
533                 unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
534                 if (RoundedTo < NewSizeInBits)
535                   NewSizeInBits = RoundedTo;
536               }
537               return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
538             })
539         // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
540         // worth considering the multiples of 64 since 2*192 and 2*384 are not
541         // valid.
542         .clampScalar(LitTyIdx, s8, s256)
543         .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
544         // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
545         // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
546         // At this point it's simple enough to accept the legal types.
547         .legalIf([=](const LegalityQuery &Query) {
548           const LLT &BigTy = Query.Types[BigTyIdx];
549           const LLT &LitTy = Query.Types[LitTyIdx];
550           if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
551             return false;
552           if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
553             return false;
554           return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
555         })
556         // Any vectors left are the wrong size. Scalarize them.
557       .scalarize(0)
558       .scalarize(1);
559   }
560 
561   getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
562       .unsupportedIf([=](const LegalityQuery &Query) {
563         const LLT &EltTy = Query.Types[1].getElementType();
564         return Query.Types[0] != EltTy;
565       })
566       .minScalar(2, s64)
567       .legalIf([=](const LegalityQuery &Query) {
568         const LLT &VecTy = Query.Types[1];
569         return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
570                VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
571       });
572 
573   getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
574       .legalIf([=](const LegalityQuery &Query) {
575         const LLT &VecTy = Query.Types[0];
576         // TODO: Support s8 and s16
577         return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
578       });
579 
580   getActionDefinitionsBuilder(G_BUILD_VECTOR)
581       .legalFor({{v4s16, s16},
582                  {v8s16, s16},
583                  {v2s32, s32},
584                  {v4s32, s32},
585                  {v2p0, p0},
586                  {v2s64, s64}})
587       .clampNumElements(0, v4s32, v4s32)
588       .clampNumElements(0, v2s64, v2s64)
589 
590       // Deal with larger scalar types, which will be implicitly truncated.
591       .legalIf([=](const LegalityQuery &Query) {
592         return Query.Types[0].getScalarSizeInBits() <
593                Query.Types[1].getSizeInBits();
594       })
595       .minScalarSameAs(1, 0);
596 
597   getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
598       {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
599       .scalarize(1);
600 
601   getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
602       .legalIf([=](const LegalityQuery &Query) {
603         const LLT &DstTy = Query.Types[0];
604         const LLT &SrcTy = Query.Types[1];
605         // For now just support the TBL2 variant which needs the source vectors
606         // to be the same size as the dest.
607         if (DstTy != SrcTy)
608           return false;
609         for (auto &Ty : {v2s32, v4s32, v2s64}) {
610           if (DstTy == Ty)
611             return true;
612         }
613         return false;
614       })
615       // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
616       // just want those lowered into G_BUILD_VECTOR
617       .lowerIf([=](const LegalityQuery &Query) {
618         return !Query.Types[1].isVector();
619       })
620       .clampNumElements(0, v4s32, v4s32)
621       .clampNumElements(0, v2s64, v2s64);
622 
623   getActionDefinitionsBuilder(G_CONCAT_VECTORS)
624       .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
625 
626   getActionDefinitionsBuilder(G_JUMP_TABLE)
627     .legalFor({{p0}, {s64}});
628 
629   getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
630     return Query.Types[0] == p0 && Query.Types[1] == s64;
631   });
632 
633   getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
634 
635   computeTables();
636   verify(*ST.getInstrInfo());
637 }
638 
639 bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
640                                           MachineInstr &MI) const {
641   MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
642   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
643   GISelChangeObserver &Observer = Helper.Observer;
644   switch (MI.getOpcode()) {
645   default:
646     // No idea what to do.
647     return false;
648   case TargetOpcode::G_VAARG:
649     return legalizeVaArg(MI, MRI, MIRBuilder);
650   case TargetOpcode::G_LOAD:
651   case TargetOpcode::G_STORE:
652     return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
653   case TargetOpcode::G_SHL:
654   case TargetOpcode::G_ASHR:
655   case TargetOpcode::G_LSHR:
656     return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
657   case TargetOpcode::G_GLOBAL_VALUE:
658     return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
659   }
660 
661   llvm_unreachable("expected switch to return");
662 }
663 
664 bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI,
665                                                       MachineRegisterInfo &MRI,
666                                                       MachineIRBuilder &MIRBuilder,
667                                                       GISelChangeObserver &Observer) const {
668   assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
669   // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
670   // G_ADD_LOW instructions.
671   // By splitting this here, we can optimize accesses in the small code model by
672   // folding in the G_ADD_LOW into the load/store offset.
673   auto GV = MI.getOperand(1).getGlobal();
674   if (GV->isThreadLocal())
675     return true; // Don't want to modify TLS vars.
676 
677   auto &TM = ST->getTargetLowering()->getTargetMachine();
678   unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
679 
680   if (OpFlags & AArch64II::MO_GOT)
681     return true;
682 
683   Register DstReg = MI.getOperand(0).getReg();
684   auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
685                   .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
686   // Set the regclass on the dest reg too.
687   MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
688 
689   MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
690       .addGlobalAddress(GV, 0,
691                         OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
692   MI.eraseFromParent();
693   return true;
694 }
695 
696 bool AArch64LegalizerInfo::legalizeIntrinsic(
697   LegalizerHelper &Helper, MachineInstr &MI) const {
698   MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
699   switch (MI.getIntrinsicID()) {
700   case Intrinsic::memcpy:
701   case Intrinsic::memset:
702   case Intrinsic::memmove:
703     if (createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI) ==
704         LegalizerHelper::UnableToLegalize)
705       return false;
706     MI.eraseFromParent();
707     return true;
708   default:
709     break;
710   }
711   return true;
712 }
713 
714 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
715     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
716     GISelChangeObserver &Observer) const {
717   assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
718          MI.getOpcode() == TargetOpcode::G_LSHR ||
719          MI.getOpcode() == TargetOpcode::G_SHL);
720   // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
721   // imported patterns can select it later. Either way, it will be legal.
722   Register AmtReg = MI.getOperand(2).getReg();
723   auto VRegAndVal = getConstantVRegValWithLookThrough(AmtReg, MRI);
724   if (!VRegAndVal)
725     return true;
726   // Check the shift amount is in range for an immediate form.
727   int64_t Amount = VRegAndVal->Value;
728   if (Amount > 31)
729     return true; // This will have to remain a register variant.
730   auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
731   MI.getOperand(2).setReg(ExtCst.getReg(0));
732   return true;
733 }
734 
735 bool AArch64LegalizerInfo::legalizeLoadStore(
736     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
737     GISelChangeObserver &Observer) const {
738   assert(MI.getOpcode() == TargetOpcode::G_STORE ||
739          MI.getOpcode() == TargetOpcode::G_LOAD);
740   // Here we just try to handle vector loads/stores where our value type might
741   // have pointer elements, which the SelectionDAG importer can't handle. To
742   // allow the existing patterns for s64 to fire for p0, we just try to bitcast
743   // the value to use s64 types.
744 
745   // Custom legalization requires the instruction, if not deleted, must be fully
746   // legalized. In order to allow further legalization of the inst, we create
747   // a new instruction and erase the existing one.
748 
749   Register ValReg = MI.getOperand(0).getReg();
750   const LLT ValTy = MRI.getType(ValReg);
751 
752   if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
753       ValTy.getElementType().getAddressSpace() != 0) {
754     LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
755     return false;
756   }
757 
758   unsigned PtrSize = ValTy.getElementType().getSizeInBits();
759   const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
760   auto &MMO = **MI.memoperands_begin();
761   if (MI.getOpcode() == TargetOpcode::G_STORE) {
762     auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
763     MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
764   } else {
765     auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
766     MIRBuilder.buildBitcast(ValReg, NewLoad);
767   }
768   MI.eraseFromParent();
769   return true;
770 }
771 
772 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
773                                          MachineRegisterInfo &MRI,
774                                          MachineIRBuilder &MIRBuilder) const {
775   MachineFunction &MF = MIRBuilder.getMF();
776   Align Alignment(MI.getOperand(2).getImm());
777   Register Dst = MI.getOperand(0).getReg();
778   Register ListPtr = MI.getOperand(1).getReg();
779 
780   LLT PtrTy = MRI.getType(ListPtr);
781   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
782 
783   const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
784   const Align PtrAlign = Align(PtrSize);
785   auto List = MIRBuilder.buildLoad(
786       PtrTy, ListPtr,
787       *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
788                                PtrSize, PtrAlign));
789 
790   MachineInstrBuilder DstPtr;
791   if (Alignment > PtrAlign) {
792     // Realign the list to the actual required alignment.
793     auto AlignMinus1 =
794         MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
795     auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
796     DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
797   } else
798     DstPtr = List;
799 
800   uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
801   MIRBuilder.buildLoad(
802       Dst, DstPtr,
803       *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
804                                ValSize, std::max(Alignment, PtrAlign)));
805 
806   auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
807 
808   auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
809 
810   MIRBuilder.buildStore(NewList, ListPtr,
811                         *MF.getMachineMemOperand(MachinePointerInfo(),
812                                                  MachineMemOperand::MOStore,
813                                                  PtrSize, PtrAlign));
814 
815   MI.eraseFromParent();
816   return true;
817 }
818