xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===-- AVRISelLowering.cpp - AVR DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AVR uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AVRISelLowering.h"
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
23 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/Support/ErrorHandling.h"
26 
27 #include "AVR.h"
28 #include "AVRMachineFunctionInfo.h"
29 #include "AVRSubtarget.h"
30 #include "AVRTargetMachine.h"
31 #include "MCTargetDesc/AVRMCTargetDesc.h"
32 
33 namespace llvm {
34 
AVRTargetLowering(const AVRTargetMachine & TM,const AVRSubtarget & STI)35 AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
36                                      const AVRSubtarget &STI)
37     : TargetLowering(TM), Subtarget(STI) {
38   // Set up the register classes.
39   addRegisterClass(MVT::i8, &AVR::GPR8RegClass);
40   addRegisterClass(MVT::i16, &AVR::DREGSRegClass);
41 
42   // Compute derived properties from the register classes.
43   computeRegisterProperties(Subtarget.getRegisterInfo());
44 
45   setBooleanContents(ZeroOrOneBooleanContent);
46   setBooleanVectorContents(ZeroOrOneBooleanContent);
47   setSchedulingPreference(Sched::RegPressure);
48   setStackPointerRegisterToSaveRestore(AVR::SP);
49   setSupportsUnalignedAtomics(true);
50 
51   setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
52   setOperationAction(ISD::BlockAddress, MVT::i16, Custom);
53 
54   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
55   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
56   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
57   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
58 
59   setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
60 
61   for (MVT VT : MVT::integer_valuetypes()) {
62     for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
63       setLoadExtAction(N, VT, MVT::i1, Promote);
64       setLoadExtAction(N, VT, MVT::i8, Expand);
65     }
66   }
67 
68   setTruncStoreAction(MVT::i16, MVT::i8, Expand);
69 
70   for (MVT VT : MVT::integer_valuetypes()) {
71     setOperationAction(ISD::ADDC, VT, Legal);
72     setOperationAction(ISD::SUBC, VT, Legal);
73     setOperationAction(ISD::ADDE, VT, Legal);
74     setOperationAction(ISD::SUBE, VT, Legal);
75   }
76 
77   // sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types
78   // revert into a sub since we don't have an add with immediate instruction.
79   setOperationAction(ISD::ADD, MVT::i32, Custom);
80   setOperationAction(ISD::ADD, MVT::i64, Custom);
81 
82   // our shift instructions are only able to shift 1 bit at a time, so handle
83   // this in a custom way.
84   setOperationAction(ISD::SRA, MVT::i8, Custom);
85   setOperationAction(ISD::SHL, MVT::i8, Custom);
86   setOperationAction(ISD::SRL, MVT::i8, Custom);
87   setOperationAction(ISD::SRA, MVT::i16, Custom);
88   setOperationAction(ISD::SHL, MVT::i16, Custom);
89   setOperationAction(ISD::SRL, MVT::i16, Custom);
90   setOperationAction(ISD::SRA, MVT::i32, Custom);
91   setOperationAction(ISD::SHL, MVT::i32, Custom);
92   setOperationAction(ISD::SRL, MVT::i32, Custom);
93   setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand);
94   setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);
95   setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
96 
97   setOperationAction(ISD::ROTL, MVT::i8, Custom);
98   setOperationAction(ISD::ROTL, MVT::i16, Expand);
99   setOperationAction(ISD::ROTR, MVT::i8, Custom);
100   setOperationAction(ISD::ROTR, MVT::i16, Expand);
101 
102   setOperationAction(ISD::BR_CC, MVT::i8, Custom);
103   setOperationAction(ISD::BR_CC, MVT::i16, Custom);
104   setOperationAction(ISD::BR_CC, MVT::i32, Custom);
105   setOperationAction(ISD::BR_CC, MVT::i64, Custom);
106   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
107 
108   setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
109   setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
110   setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
111   setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
112   setOperationAction(ISD::SETCC, MVT::i8, Custom);
113   setOperationAction(ISD::SETCC, MVT::i16, Custom);
114   setOperationAction(ISD::SETCC, MVT::i32, Custom);
115   setOperationAction(ISD::SETCC, MVT::i64, Custom);
116   setOperationAction(ISD::SELECT, MVT::i8, Expand);
117   setOperationAction(ISD::SELECT, MVT::i16, Expand);
118 
119   setOperationAction(ISD::BSWAP, MVT::i16, Expand);
120 
121   // Add support for postincrement and predecrement load/stores.
122   setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
123   setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
124   setIndexedLoadAction(ISD::PRE_DEC, MVT::i8, Legal);
125   setIndexedLoadAction(ISD::PRE_DEC, MVT::i16, Legal);
126   setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
127   setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
128   setIndexedStoreAction(ISD::PRE_DEC, MVT::i8, Legal);
129   setIndexedStoreAction(ISD::PRE_DEC, MVT::i16, Legal);
130 
131   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
132 
133   setOperationAction(ISD::VASTART, MVT::Other, Custom);
134   setOperationAction(ISD::VAEND, MVT::Other, Expand);
135   setOperationAction(ISD::VAARG, MVT::Other, Expand);
136   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
137 
138   // Atomic operations which must be lowered to rtlib calls
139   for (MVT VT : MVT::integer_valuetypes()) {
140     setOperationAction(ISD::ATOMIC_SWAP, VT, Expand);
141     setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);
142     setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
143     setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
144     setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
145     setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
146     setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
147   }
148 
149   // Division/remainder
150   setOperationAction(ISD::UDIV, MVT::i8, Expand);
151   setOperationAction(ISD::UDIV, MVT::i16, Expand);
152   setOperationAction(ISD::UREM, MVT::i8, Expand);
153   setOperationAction(ISD::UREM, MVT::i16, Expand);
154   setOperationAction(ISD::SDIV, MVT::i8, Expand);
155   setOperationAction(ISD::SDIV, MVT::i16, Expand);
156   setOperationAction(ISD::SREM, MVT::i8, Expand);
157   setOperationAction(ISD::SREM, MVT::i16, Expand);
158 
159   // Make division and modulus custom
160   setOperationAction(ISD::UDIVREM, MVT::i8, Custom);
161   setOperationAction(ISD::UDIVREM, MVT::i16, Custom);
162   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
163   setOperationAction(ISD::SDIVREM, MVT::i8, Custom);
164   setOperationAction(ISD::SDIVREM, MVT::i16, Custom);
165   setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
166 
167   // Do not use MUL. The AVR instructions are closer to SMUL_LOHI &co.
168   setOperationAction(ISD::MUL, MVT::i8, Expand);
169   setOperationAction(ISD::MUL, MVT::i16, Expand);
170 
171   // Expand 16 bit multiplications.
172   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
173   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
174 
175   // Expand multiplications to libcalls when there is
176   // no hardware MUL.
177   if (!Subtarget.supportsMultiplication()) {
178     setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
179     setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
180   }
181 
182   for (MVT VT : MVT::integer_valuetypes()) {
183     setOperationAction(ISD::MULHS, VT, Expand);
184     setOperationAction(ISD::MULHU, VT, Expand);
185   }
186 
187   for (MVT VT : MVT::integer_valuetypes()) {
188     setOperationAction(ISD::CTPOP, VT, Expand);
189     setOperationAction(ISD::CTLZ, VT, Expand);
190     setOperationAction(ISD::CTTZ, VT, Expand);
191   }
192 
193   for (MVT VT : MVT::integer_valuetypes()) {
194     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
195     // TODO: The generated code is pretty poor. Investigate using the
196     // same "shift and subtract with carry" trick that we do for
197     // extending 8-bit to 16-bit. This may require infrastructure
198     // improvements in how we treat 16-bit "registers" to be feasible.
199   }
200 
201   setMinFunctionAlignment(Align(2));
202   setMinimumJumpTableEntries(UINT_MAX);
203 }
204 
getSetCCResultType(const DataLayout & DL,LLVMContext &,EVT VT) const205 EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
206                                           EVT VT) const {
207   assert(!VT.isVector() && "No AVR SetCC type for vectors!");
208   return MVT::i8;
209 }
210 
LowerShifts(SDValue Op,SelectionDAG & DAG) const211 SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
212   unsigned Opc8;
213   const SDNode *N = Op.getNode();
214   EVT VT = Op.getValueType();
215   SDLoc dl(N);
216   assert(llvm::has_single_bit<uint32_t>(VT.getSizeInBits()) &&
217          "Expected power-of-2 shift amount");
218 
219   if (VT.getSizeInBits() == 32) {
220     if (!isa<ConstantSDNode>(N->getOperand(1))) {
221       // 32-bit shifts are converted to a loop in IR.
222       // This should be unreachable.
223       report_fatal_error("Expected a constant shift amount!");
224     }
225     SDVTList ResTys = DAG.getVTList(MVT::i16, MVT::i16);
226     SDValue SrcLo =
227         DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0),
228                     DAG.getConstant(0, dl, MVT::i16));
229     SDValue SrcHi =
230         DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0),
231                     DAG.getConstant(1, dl, MVT::i16));
232     uint64_t ShiftAmount = N->getConstantOperandVal(1);
233     if (ShiftAmount == 16) {
234       // Special case these two operations because they appear to be used by the
235       // generic codegen parts to lower 32-bit numbers.
236       // TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit
237       // shift of a part of the 32-bit value?
238       switch (Op.getOpcode()) {
239       case ISD::SHL: {
240         SDValue Zero = DAG.getConstant(0, dl, MVT::i16);
241         return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo);
242       }
243       case ISD::SRL: {
244         SDValue Zero = DAG.getConstant(0, dl, MVT::i16);
245         return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero);
246       }
247       }
248     }
249     SDValue Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8);
250     unsigned Opc;
251     switch (Op.getOpcode()) {
252     default:
253       llvm_unreachable("Invalid 32-bit shift opcode!");
254     case ISD::SHL:
255       Opc = AVRISD::LSLW;
256       break;
257     case ISD::SRL:
258       Opc = AVRISD::LSRW;
259       break;
260     case ISD::SRA:
261       Opc = AVRISD::ASRW;
262       break;
263     }
264     SDValue Result = DAG.getNode(Opc, dl, ResTys, SrcLo, SrcHi, Cnt);
265     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Result.getValue(0),
266                        Result.getValue(1));
267   }
268 
269   // Expand non-constant shifts to loops.
270   if (!isa<ConstantSDNode>(N->getOperand(1))) {
271     switch (Op.getOpcode()) {
272     default:
273       llvm_unreachable("Invalid shift opcode!");
274     case ISD::SHL:
275       return DAG.getNode(AVRISD::LSLLOOP, dl, VT, N->getOperand(0),
276                          N->getOperand(1));
277     case ISD::SRL:
278       return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0),
279                          N->getOperand(1));
280     case ISD::ROTL: {
281       SDValue Amt = N->getOperand(1);
282       EVT AmtVT = Amt.getValueType();
283       Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt,
284                         DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT));
285       return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0), Amt);
286     }
287     case ISD::ROTR: {
288       SDValue Amt = N->getOperand(1);
289       EVT AmtVT = Amt.getValueType();
290       Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt,
291                         DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT));
292       return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0), Amt);
293     }
294     case ISD::SRA:
295       return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0),
296                          N->getOperand(1));
297     }
298   }
299 
300   uint64_t ShiftAmount = N->getConstantOperandVal(1);
301   SDValue Victim = N->getOperand(0);
302 
303   switch (Op.getOpcode()) {
304   case ISD::SRA:
305     Opc8 = AVRISD::ASR;
306     break;
307   case ISD::ROTL:
308     Opc8 = AVRISD::ROL;
309     ShiftAmount = ShiftAmount % VT.getSizeInBits();
310     break;
311   case ISD::ROTR:
312     Opc8 = AVRISD::ROR;
313     ShiftAmount = ShiftAmount % VT.getSizeInBits();
314     break;
315   case ISD::SRL:
316     Opc8 = AVRISD::LSR;
317     break;
318   case ISD::SHL:
319     Opc8 = AVRISD::LSL;
320     break;
321   default:
322     llvm_unreachable("Invalid shift opcode");
323   }
324 
325   // Optimize int8/int16 shifts.
326   if (VT.getSizeInBits() == 8) {
327     if (Op.getOpcode() == ISD::SHL && 4 <= ShiftAmount && ShiftAmount < 7) {
328       // Optimize LSL when 4 <= ShiftAmount <= 6.
329       Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
330       Victim =
331           DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0xf0, dl, VT));
332       ShiftAmount -= 4;
333     } else if (Op.getOpcode() == ISD::SRL && 4 <= ShiftAmount &&
334                ShiftAmount < 7) {
335       // Optimize LSR when 4 <= ShiftAmount <= 6.
336       Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
337       Victim =
338           DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0x0f, dl, VT));
339       ShiftAmount -= 4;
340     } else if (Op.getOpcode() == ISD::SHL && ShiftAmount == 7) {
341       // Optimize LSL when ShiftAmount == 7.
342       Victim = DAG.getNode(AVRISD::LSLBN, dl, VT, Victim,
343                            DAG.getConstant(7, dl, VT));
344       ShiftAmount = 0;
345     } else if (Op.getOpcode() == ISD::SRL && ShiftAmount == 7) {
346       // Optimize LSR when ShiftAmount == 7.
347       Victim = DAG.getNode(AVRISD::LSRBN, dl, VT, Victim,
348                            DAG.getConstant(7, dl, VT));
349       ShiftAmount = 0;
350     } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 6) {
351       // Optimize ASR when ShiftAmount == 6.
352       Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim,
353                            DAG.getConstant(6, dl, VT));
354       ShiftAmount = 0;
355     } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) {
356       // Optimize ASR when ShiftAmount == 7.
357       Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim,
358                            DAG.getConstant(7, dl, VT));
359       ShiftAmount = 0;
360     } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 3) {
361       // Optimize left rotation 3 bits to swap then right rotation 1 bit.
362       Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
363       Victim = DAG.getNode(AVRISD::ROR, dl, VT, Victim);
364       ShiftAmount = 0;
365     } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 3) {
366       // Optimize right rotation 3 bits to swap then left rotation 1 bit.
367       Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
368       Victim = DAG.getNode(AVRISD::ROL, dl, VT, Victim);
369       ShiftAmount = 0;
370     } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 7) {
371       // Optimize left rotation 7 bits to right rotation 1 bit.
372       Victim = DAG.getNode(AVRISD::ROR, dl, VT, Victim);
373       ShiftAmount = 0;
374     } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 7) {
375       // Optimize right rotation 7 bits to left rotation 1 bit.
376       Victim = DAG.getNode(AVRISD::ROL, dl, VT, Victim);
377       ShiftAmount = 0;
378     } else if ((Op.getOpcode() == ISD::ROTR || Op.getOpcode() == ISD::ROTL) &&
379                ShiftAmount >= 4) {
380       // Optimize left/right rotation with the SWAP instruction.
381       Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
382       ShiftAmount -= 4;
383     }
384   } else if (VT.getSizeInBits() == 16) {
385     if (Op.getOpcode() == ISD::SRA)
386       // Special optimization for int16 arithmetic right shift.
387       switch (ShiftAmount) {
388       case 15:
389         Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
390                              DAG.getConstant(15, dl, VT));
391         ShiftAmount = 0;
392         break;
393       case 14:
394         Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
395                              DAG.getConstant(14, dl, VT));
396         ShiftAmount = 0;
397         break;
398       case 7:
399         Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
400                              DAG.getConstant(7, dl, VT));
401         ShiftAmount = 0;
402         break;
403       default:
404         break;
405       }
406     if (4 <= ShiftAmount && ShiftAmount < 8)
407       switch (Op.getOpcode()) {
408       case ISD::SHL:
409         Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,
410                              DAG.getConstant(4, dl, VT));
411         ShiftAmount -= 4;
412         break;
413       case ISD::SRL:
414         Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,
415                              DAG.getConstant(4, dl, VT));
416         ShiftAmount -= 4;
417         break;
418       default:
419         break;
420       }
421     else if (8 <= ShiftAmount && ShiftAmount < 12)
422       switch (Op.getOpcode()) {
423       case ISD::SHL:
424         Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,
425                              DAG.getConstant(8, dl, VT));
426         ShiftAmount -= 8;
427         // Only operate on the higher byte for remaining shift bits.
428         Opc8 = AVRISD::LSLHI;
429         break;
430       case ISD::SRL:
431         Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,
432                              DAG.getConstant(8, dl, VT));
433         ShiftAmount -= 8;
434         // Only operate on the lower byte for remaining shift bits.
435         Opc8 = AVRISD::LSRLO;
436         break;
437       case ISD::SRA:
438         Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
439                              DAG.getConstant(8, dl, VT));
440         ShiftAmount -= 8;
441         // Only operate on the lower byte for remaining shift bits.
442         Opc8 = AVRISD::ASRLO;
443         break;
444       default:
445         break;
446       }
447     else if (12 <= ShiftAmount)
448       switch (Op.getOpcode()) {
449       case ISD::SHL:
450         Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,
451                              DAG.getConstant(12, dl, VT));
452         ShiftAmount -= 12;
453         // Only operate on the higher byte for remaining shift bits.
454         Opc8 = AVRISD::LSLHI;
455         break;
456       case ISD::SRL:
457         Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,
458                              DAG.getConstant(12, dl, VT));
459         ShiftAmount -= 12;
460         // Only operate on the lower byte for remaining shift bits.
461         Opc8 = AVRISD::LSRLO;
462         break;
463       case ISD::SRA:
464         Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
465                              DAG.getConstant(8, dl, VT));
466         ShiftAmount -= 8;
467         // Only operate on the lower byte for remaining shift bits.
468         Opc8 = AVRISD::ASRLO;
469         break;
470       default:
471         break;
472       }
473   }
474 
475   while (ShiftAmount--) {
476     Victim = DAG.getNode(Opc8, dl, VT, Victim);
477   }
478 
479   return Victim;
480 }
481 
LowerDivRem(SDValue Op,SelectionDAG & DAG) const482 SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
483   unsigned Opcode = Op->getOpcode();
484   assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
485          "Invalid opcode for Div/Rem lowering");
486   bool IsSigned = (Opcode == ISD::SDIVREM);
487   EVT VT = Op->getValueType(0);
488   Type *Ty = VT.getTypeForEVT(*DAG.getContext());
489 
490   RTLIB::Libcall LC;
491   switch (VT.getSimpleVT().SimpleTy) {
492   default:
493     llvm_unreachable("Unexpected request for libcall!");
494   case MVT::i8:
495     LC = IsSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
496     break;
497   case MVT::i16:
498     LC = IsSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
499     break;
500   case MVT::i32:
501     LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
502     break;
503   }
504 
505   SDValue InChain = DAG.getEntryNode();
506 
507   TargetLowering::ArgListTy Args;
508   TargetLowering::ArgListEntry Entry;
509   for (SDValue const &Value : Op->op_values()) {
510     Entry.Node = Value;
511     Entry.Ty = Value.getValueType().getTypeForEVT(*DAG.getContext());
512     Entry.IsSExt = IsSigned;
513     Entry.IsZExt = !IsSigned;
514     Args.push_back(Entry);
515   }
516 
517   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
518                                          getPointerTy(DAG.getDataLayout()));
519 
520   Type *RetTy = (Type *)StructType::get(Ty, Ty);
521 
522   SDLoc dl(Op);
523   TargetLowering::CallLoweringInfo CLI(DAG);
524   CLI.setDebugLoc(dl)
525       .setChain(InChain)
526       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
527       .setInRegister()
528       .setSExtResult(IsSigned)
529       .setZExtResult(!IsSigned);
530 
531   std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
532   return CallInfo.first;
533 }
534 
LowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const535 SDValue AVRTargetLowering::LowerGlobalAddress(SDValue Op,
536                                               SelectionDAG &DAG) const {
537   auto DL = DAG.getDataLayout();
538 
539   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
540   int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
541 
542   // Create the TargetGlobalAddress node, folding in the constant offset.
543   SDValue Result =
544       DAG.getTargetGlobalAddress(GV, SDLoc(Op), getPointerTy(DL), Offset);
545   return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result);
546 }
547 
LowerBlockAddress(SDValue Op,SelectionDAG & DAG) const548 SDValue AVRTargetLowering::LowerBlockAddress(SDValue Op,
549                                              SelectionDAG &DAG) const {
550   auto DL = DAG.getDataLayout();
551   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
552 
553   SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(DL));
554 
555   return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result);
556 }
557 
558 /// IntCCToAVRCC - Convert a DAG integer condition code to an AVR CC.
intCCToAVRCC(ISD::CondCode CC)559 static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) {
560   switch (CC) {
561   default:
562     llvm_unreachable("Unknown condition code!");
563   case ISD::SETEQ:
564     return AVRCC::COND_EQ;
565   case ISD::SETNE:
566     return AVRCC::COND_NE;
567   case ISD::SETGE:
568     return AVRCC::COND_GE;
569   case ISD::SETLT:
570     return AVRCC::COND_LT;
571   case ISD::SETUGE:
572     return AVRCC::COND_SH;
573   case ISD::SETULT:
574     return AVRCC::COND_LO;
575   }
576 }
577 
578 /// Returns appropriate CP/CPI/CPC nodes code for the given 8/16-bit operands.
getAVRCmp(SDValue LHS,SDValue RHS,SelectionDAG & DAG,SDLoc DL) const579 SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS,
580                                      SelectionDAG &DAG, SDLoc DL) const {
581   assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) &&
582          "LHS and RHS have different types");
583   assert(((LHS.getSimpleValueType() == MVT::i16) ||
584           (LHS.getSimpleValueType() == MVT::i8)) &&
585          "invalid comparison type");
586 
587   SDValue Cmp;
588 
589   if (LHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(RHS)) {
590     uint64_t Imm = RHS->getAsZExtVal();
591     // Generate a CPI/CPC pair if RHS is a 16-bit constant. Use the zero
592     // register for the constant RHS if its lower or higher byte is zero.
593     SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
594                                 DAG.getIntPtrConstant(0, DL));
595     SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
596                                 DAG.getIntPtrConstant(1, DL));
597     SDValue RHSlo = (Imm & 0xff) == 0
598                         ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
599                         : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
600                                       DAG.getIntPtrConstant(0, DL));
601     SDValue RHShi = (Imm & 0xff00) == 0
602                         ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
603                         : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
604                                       DAG.getIntPtrConstant(1, DL));
605     Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
606     Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
607   } else if (RHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(LHS)) {
608     // Generate a CPI/CPC pair if LHS is a 16-bit constant. Use the zero
609     // register for the constant LHS if its lower or higher byte is zero.
610     uint64_t Imm = LHS->getAsZExtVal();
611     SDValue LHSlo = (Imm & 0xff) == 0
612                         ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
613                         : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
614                                       DAG.getIntPtrConstant(0, DL));
615     SDValue LHShi = (Imm & 0xff00) == 0
616                         ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
617                         : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
618                                       DAG.getIntPtrConstant(1, DL));
619     SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
620                                 DAG.getIntPtrConstant(0, DL));
621     SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
622                                 DAG.getIntPtrConstant(1, DL));
623     Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
624     Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
625   } else {
626     // Generate ordinary 16-bit comparison.
627     Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS);
628   }
629 
630   return Cmp;
631 }
632 
633 /// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for
634 /// the given operands.
getAVRCmp(SDValue LHS,SDValue RHS,ISD::CondCode CC,SDValue & AVRcc,SelectionDAG & DAG,SDLoc DL) const635 SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
636                                      SDValue &AVRcc, SelectionDAG &DAG,
637                                      SDLoc DL) const {
638   SDValue Cmp;
639   EVT VT = LHS.getValueType();
640   bool UseTest = false;
641 
642   switch (CC) {
643   default:
644     break;
645   case ISD::SETLE: {
646     // Swap operands and reverse the branching condition.
647     std::swap(LHS, RHS);
648     CC = ISD::SETGE;
649     break;
650   }
651   case ISD::SETGT: {
652     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
653       switch (C->getSExtValue()) {
654       case -1: {
655         // When doing lhs > -1 use a tst instruction on the top part of lhs
656         // and use brpl instead of using a chain of cp/cpc.
657         UseTest = true;
658         AVRcc = DAG.getConstant(AVRCC::COND_PL, DL, MVT::i8);
659         break;
660       }
661       case 0: {
662         // Turn lhs > 0 into 0 < lhs since 0 can be materialized with
663         // __zero_reg__ in lhs.
664         RHS = LHS;
665         LHS = DAG.getConstant(0, DL, VT);
666         CC = ISD::SETLT;
667         break;
668       }
669       default: {
670         // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows
671         // us to  fold the constant into the cmp instruction.
672         RHS = DAG.getSignedConstant(C->getSExtValue() + 1, DL, VT);
673         CC = ISD::SETGE;
674         break;
675       }
676       }
677       break;
678     }
679     // Swap operands and reverse the branching condition.
680     std::swap(LHS, RHS);
681     CC = ISD::SETLT;
682     break;
683   }
684   case ISD::SETLT: {
685     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
686       switch (C->getSExtValue()) {
687       case 1: {
688         // Turn lhs < 1 into 0 >= lhs since 0 can be materialized with
689         // __zero_reg__ in lhs.
690         RHS = LHS;
691         LHS = DAG.getConstant(0, DL, VT);
692         CC = ISD::SETGE;
693         break;
694       }
695       case 0: {
696         // When doing lhs < 0 use a tst instruction on the top part of lhs
697         // and use brmi instead of using a chain of cp/cpc.
698         UseTest = true;
699         AVRcc = DAG.getConstant(AVRCC::COND_MI, DL, MVT::i8);
700         break;
701       }
702       }
703     }
704     break;
705   }
706   case ISD::SETULE: {
707     // Swap operands and reverse the branching condition.
708     std::swap(LHS, RHS);
709     CC = ISD::SETUGE;
710     break;
711   }
712   case ISD::SETUGT: {
713     // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to
714     // fold the constant into the cmp instruction.
715     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
716       // Doing a "icmp ugt i16 65535, %0" comparison should have been converted
717       // already to something else. Assert to make sure this assumption holds.
718       assert((!C->isAllOnes()) && "integer overflow in comparison transform");
719       RHS = DAG.getConstant(C->getZExtValue() + 1, DL, VT);
720       CC = ISD::SETUGE;
721       break;
722     }
723     // Swap operands and reverse the branching condition.
724     std::swap(LHS, RHS);
725     CC = ISD::SETULT;
726     break;
727   }
728   }
729 
730   // Expand 32 and 64 bit comparisons with custom CMP and CMPC nodes instead of
731   // using the default and/or/xor expansion code which is much longer.
732   if (VT == MVT::i32) {
733     SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,
734                                 DAG.getIntPtrConstant(0, DL));
735     SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,
736                                 DAG.getIntPtrConstant(1, DL));
737     SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,
738                                 DAG.getIntPtrConstant(0, DL));
739     SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,
740                                 DAG.getIntPtrConstant(1, DL));
741 
742     if (UseTest) {
743       // When using tst we only care about the highest part.
744       SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHShi,
745                                 DAG.getIntPtrConstant(1, DL));
746       Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
747     } else {
748       Cmp = getAVRCmp(LHSlo, RHSlo, DAG, DL);
749       Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
750     }
751   } else if (VT == MVT::i64) {
752     SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,
753                                 DAG.getIntPtrConstant(0, DL));
754     SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,
755                                 DAG.getIntPtrConstant(1, DL));
756 
757     SDValue LHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,
758                                DAG.getIntPtrConstant(0, DL));
759     SDValue LHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,
760                                DAG.getIntPtrConstant(1, DL));
761     SDValue LHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,
762                                DAG.getIntPtrConstant(0, DL));
763     SDValue LHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,
764                                DAG.getIntPtrConstant(1, DL));
765 
766     SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,
767                                 DAG.getIntPtrConstant(0, DL));
768     SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,
769                                 DAG.getIntPtrConstant(1, DL));
770 
771     SDValue RHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,
772                                DAG.getIntPtrConstant(0, DL));
773     SDValue RHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,
774                                DAG.getIntPtrConstant(1, DL));
775     SDValue RHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,
776                                DAG.getIntPtrConstant(0, DL));
777     SDValue RHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,
778                                DAG.getIntPtrConstant(1, DL));
779 
780     if (UseTest) {
781       // When using tst we only care about the highest part.
782       SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS3,
783                                 DAG.getIntPtrConstant(1, DL));
784       Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
785     } else {
786       Cmp = getAVRCmp(LHS0, RHS0, DAG, DL);
787       Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp);
788       Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp);
789       Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp);
790     }
791   } else if (VT == MVT::i8 || VT == MVT::i16) {
792     if (UseTest) {
793       // When using tst we only care about the highest part.
794       Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue,
795                         (VT == MVT::i8)
796                             ? LHS
797                             : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8,
798                                           LHS, DAG.getIntPtrConstant(1, DL)));
799     } else {
800       Cmp = getAVRCmp(LHS, RHS, DAG, DL);
801     }
802   } else {
803     llvm_unreachable("Invalid comparison size");
804   }
805 
806   // When using a test instruction AVRcc is already set.
807   if (!UseTest) {
808     AVRcc = DAG.getConstant(intCCToAVRCC(CC), DL, MVT::i8);
809   }
810 
811   return Cmp;
812 }
813 
LowerBR_CC(SDValue Op,SelectionDAG & DAG) const814 SDValue AVRTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
815   SDValue Chain = Op.getOperand(0);
816   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
817   SDValue LHS = Op.getOperand(2);
818   SDValue RHS = Op.getOperand(3);
819   SDValue Dest = Op.getOperand(4);
820   SDLoc dl(Op);
821 
822   SDValue TargetCC;
823   SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl);
824 
825   return DAG.getNode(AVRISD::BRCOND, dl, MVT::Other, Chain, Dest, TargetCC,
826                      Cmp);
827 }
828 
LowerSELECT_CC(SDValue Op,SelectionDAG & DAG) const829 SDValue AVRTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
830   SDValue LHS = Op.getOperand(0);
831   SDValue RHS = Op.getOperand(1);
832   SDValue TrueV = Op.getOperand(2);
833   SDValue FalseV = Op.getOperand(3);
834   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
835   SDLoc dl(Op);
836 
837   SDValue TargetCC;
838   SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl);
839 
840   SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};
841 
842   return DAG.getNode(AVRISD::SELECT_CC, dl, Op.getValueType(), Ops);
843 }
844 
LowerSETCC(SDValue Op,SelectionDAG & DAG) const845 SDValue AVRTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
846   SDValue LHS = Op.getOperand(0);
847   SDValue RHS = Op.getOperand(1);
848   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
849   SDLoc DL(Op);
850 
851   SDValue TargetCC;
852   SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, DL);
853 
854   SDValue TrueV = DAG.getConstant(1, DL, Op.getValueType());
855   SDValue FalseV = DAG.getConstant(0, DL, Op.getValueType());
856   SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};
857 
858   return DAG.getNode(AVRISD::SELECT_CC, DL, Op.getValueType(), Ops);
859 }
860 
LowerVASTART(SDValue Op,SelectionDAG & DAG) const861 SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
862   const MachineFunction &MF = DAG.getMachineFunction();
863   const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
864   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
865   auto DL = DAG.getDataLayout();
866   SDLoc dl(Op);
867 
868   // Vastart just stores the address of the VarArgsFrameIndex slot into the
869   // memory location argument.
870   SDValue FI = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy(DL));
871 
872   return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
873                       MachinePointerInfo(SV));
874 }
875 
876 // Modify the existing ISD::INLINEASM node to add the implicit zero register.
LowerINLINEASM(SDValue Op,SelectionDAG & DAG) const877 SDValue AVRTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
878   SDValue ZeroReg = DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8);
879   if (Op.getOperand(Op.getNumOperands() - 1) == ZeroReg ||
880       Op.getOperand(Op.getNumOperands() - 2) == ZeroReg) {
881     // Zero register has already been added. Don't add it again.
882     // If this isn't handled, we get called over and over again.
883     return Op;
884   }
885 
886   // Get a list of operands to the new INLINEASM node. This is mostly a copy,
887   // with some edits.
888   // Add the following operands at the end (but before the glue node, if it's
889   // there):
890   //  - The flags of the implicit zero register operand.
891   //  - The implicit zero register operand itself.
892   SDLoc dl(Op);
893   SmallVector<SDValue, 8> Ops;
894   SDNode *N = Op.getNode();
895   SDValue Glue;
896   for (unsigned I = 0; I < N->getNumOperands(); I++) {
897     SDValue Operand = N->getOperand(I);
898     if (Operand.getValueType() == MVT::Glue) {
899       // The glue operand always needs to be at the end, so we need to treat it
900       // specially.
901       Glue = Operand;
902     } else {
903       Ops.push_back(Operand);
904     }
905   }
906   InlineAsm::Flag Flags(InlineAsm::Kind::RegUse, 1);
907   Ops.push_back(DAG.getTargetConstant(Flags, dl, MVT::i32));
908   Ops.push_back(ZeroReg);
909   if (Glue) {
910     Ops.push_back(Glue);
911   }
912 
913   // Replace the current INLINEASM node with a new one that has the zero
914   // register as implicit parameter.
915   SDValue New = DAG.getNode(N->getOpcode(), dl, N->getVTList(), Ops);
916   DAG.ReplaceAllUsesOfValueWith(Op, New);
917   DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), New.getValue(1));
918 
919   return New;
920 }
921 
LowerOperation(SDValue Op,SelectionDAG & DAG) const922 SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
923   switch (Op.getOpcode()) {
924   default:
925     llvm_unreachable("Don't know how to custom lower this!");
926   case ISD::SHL:
927   case ISD::SRA:
928   case ISD::SRL:
929   case ISD::ROTL:
930   case ISD::ROTR:
931     return LowerShifts(Op, DAG);
932   case ISD::GlobalAddress:
933     return LowerGlobalAddress(Op, DAG);
934   case ISD::BlockAddress:
935     return LowerBlockAddress(Op, DAG);
936   case ISD::BR_CC:
937     return LowerBR_CC(Op, DAG);
938   case ISD::SELECT_CC:
939     return LowerSELECT_CC(Op, DAG);
940   case ISD::SETCC:
941     return LowerSETCC(Op, DAG);
942   case ISD::VASTART:
943     return LowerVASTART(Op, DAG);
944   case ISD::SDIVREM:
945   case ISD::UDIVREM:
946     return LowerDivRem(Op, DAG);
947   case ISD::INLINEASM:
948     return LowerINLINEASM(Op, DAG);
949   }
950 
951   return SDValue();
952 }
953 
954 /// Replace a node with an illegal result type
955 /// with a new node built out of custom code.
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const956 void AVRTargetLowering::ReplaceNodeResults(SDNode *N,
957                                            SmallVectorImpl<SDValue> &Results,
958                                            SelectionDAG &DAG) const {
959   SDLoc DL(N);
960 
961   switch (N->getOpcode()) {
962   case ISD::ADD: {
963     // Convert add (x, imm) into sub (x, -imm).
964     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
965       SDValue Sub = DAG.getNode(
966           ISD::SUB, DL, N->getValueType(0), N->getOperand(0),
967           DAG.getConstant(-C->getAPIntValue(), DL, C->getValueType(0)));
968       Results.push_back(Sub);
969     }
970     break;
971   }
972   default: {
973     SDValue Res = LowerOperation(SDValue(N, 0), DAG);
974 
975     for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
976       Results.push_back(Res.getValue(I));
977 
978     break;
979   }
980   }
981 }
982 
983 /// Return true if the addressing mode represented
984 /// by AM is legal for this target, for a load/store of the specified type.
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const985 bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,
986                                               const AddrMode &AM, Type *Ty,
987                                               unsigned AS,
988                                               Instruction *I) const {
989   int64_t Offs = AM.BaseOffs;
990 
991   // Allow absolute addresses.
992   if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && Offs == 0) {
993     return true;
994   }
995 
996   // Flash memory instructions only allow zero offsets.
997   if (isa<PointerType>(Ty) && AS == AVR::ProgramMemory) {
998     return false;
999   }
1000 
1001   // Allow reg+<6bit> offset.
1002   if (Offs < 0)
1003     Offs = -Offs;
1004   if (AM.BaseGV == nullptr && AM.HasBaseReg && AM.Scale == 0 &&
1005       isUInt<6>(Offs)) {
1006     return true;
1007   }
1008 
1009   return false;
1010 }
1011 
1012 /// Returns true by value, base pointer and
1013 /// offset pointer and addressing mode by reference if the node's address
1014 /// can be legally represented as pre-indexed load / store address.
getPreIndexedAddressParts(SDNode * N,SDValue & Base,SDValue & Offset,ISD::MemIndexedMode & AM,SelectionDAG & DAG) const1015 bool AVRTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1016                                                   SDValue &Offset,
1017                                                   ISD::MemIndexedMode &AM,
1018                                                   SelectionDAG &DAG) const {
1019   EVT VT;
1020   const SDNode *Op;
1021   SDLoc DL(N);
1022 
1023   if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1024     VT = LD->getMemoryVT();
1025     Op = LD->getBasePtr().getNode();
1026     if (LD->getExtensionType() != ISD::NON_EXTLOAD)
1027       return false;
1028     if (AVR::isProgramMemoryAccess(LD)) {
1029       return false;
1030     }
1031   } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1032     VT = ST->getMemoryVT();
1033     Op = ST->getBasePtr().getNode();
1034     if (AVR::isProgramMemoryAccess(ST)) {
1035       return false;
1036     }
1037   } else {
1038     return false;
1039   }
1040 
1041   if (VT != MVT::i8 && VT != MVT::i16) {
1042     return false;
1043   }
1044 
1045   if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {
1046     return false;
1047   }
1048 
1049   if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
1050     int RHSC = RHS->getSExtValue();
1051     if (Op->getOpcode() == ISD::SUB)
1052       RHSC = -RHSC;
1053 
1054     if ((VT == MVT::i16 && RHSC != -2) || (VT == MVT::i8 && RHSC != -1)) {
1055       return false;
1056     }
1057 
1058     Base = Op->getOperand(0);
1059     Offset = DAG.getSignedConstant(RHSC, DL, MVT::i8);
1060     AM = ISD::PRE_DEC;
1061 
1062     return true;
1063   }
1064 
1065   return false;
1066 }
1067 
1068 /// Returns true by value, base pointer and
1069 /// offset pointer and addressing mode by reference if this node can be
1070 /// combined with a load / store to form a post-indexed load / store.
getPostIndexedAddressParts(SDNode * N,SDNode * Op,SDValue & Base,SDValue & Offset,ISD::MemIndexedMode & AM,SelectionDAG & DAG) const1071 bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
1072                                                    SDValue &Base,
1073                                                    SDValue &Offset,
1074                                                    ISD::MemIndexedMode &AM,
1075                                                    SelectionDAG &DAG) const {
1076   EVT VT;
1077   SDValue Ptr;
1078   SDLoc DL(N);
1079 
1080   if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1081     VT = LD->getMemoryVT();
1082     Ptr = LD->getBasePtr();
1083     if (LD->getExtensionType() != ISD::NON_EXTLOAD)
1084       return false;
1085   } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1086     VT = ST->getMemoryVT();
1087     Ptr = ST->getBasePtr();
1088     // We can not store to program memory.
1089     if (AVR::isProgramMemoryAccess(ST))
1090       return false;
1091     // Since the high byte need to be stored first, we can not emit
1092     // i16 post increment store like:
1093     // st X+, r24
1094     // st X+, r25
1095     if (VT == MVT::i16 && !Subtarget.hasLowByteFirst())
1096       return false;
1097   } else {
1098     return false;
1099   }
1100 
1101   if (VT != MVT::i8 && VT != MVT::i16) {
1102     return false;
1103   }
1104 
1105   if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {
1106     return false;
1107   }
1108 
1109   if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
1110     int RHSC = RHS->getSExtValue();
1111     if (Op->getOpcode() == ISD::SUB)
1112       RHSC = -RHSC;
1113     if ((VT == MVT::i16 && RHSC != 2) || (VT == MVT::i8 && RHSC != 1)) {
1114       return false;
1115     }
1116 
1117     // FIXME: We temporarily disable post increment load from program memory,
1118     //        due to bug https://github.com/llvm/llvm-project/issues/59914.
1119     if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
1120       if (AVR::isProgramMemoryAccess(LD))
1121         return false;
1122 
1123     Base = Op->getOperand(0);
1124 
1125     // Post-indexing updates the base, so it's not a valid transform
1126     // if that's not the same as the load's pointer.
1127     if (Ptr != Base)
1128       return false;
1129 
1130     Offset = DAG.getConstant(RHSC, DL, MVT::i8);
1131     AM = ISD::POST_INC;
1132 
1133     return true;
1134   }
1135 
1136   return false;
1137 }
1138 
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const1139 bool AVRTargetLowering::isOffsetFoldingLegal(
1140     const GlobalAddressSDNode *GA) const {
1141   return true;
1142 }
1143 
1144 //===----------------------------------------------------------------------===//
1145 //             Formal Arguments Calling Convention Implementation
1146 //===----------------------------------------------------------------------===//
1147 
1148 #include "AVRGenCallingConv.inc"
1149 
1150 /// Registers for calling conventions, ordered in reverse as required by ABI.
1151 /// Both arrays must be of the same length.
1152 static const MCPhysReg RegList8AVR[] = {
1153     AVR::R25, AVR::R24, AVR::R23, AVR::R22, AVR::R21, AVR::R20,
1154     AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14,
1155     AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9,  AVR::R8};
1156 static const MCPhysReg RegList8Tiny[] = {AVR::R25, AVR::R24, AVR::R23,
1157                                          AVR::R22, AVR::R21, AVR::R20};
1158 static const MCPhysReg RegList16AVR[] = {
1159     AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, AVR::R22R21,
1160     AVR::R21R20, AVR::R20R19, AVR::R19R18, AVR::R18R17, AVR::R17R16,
1161     AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11,
1162     AVR::R11R10, AVR::R10R9,  AVR::R9R8};
1163 static const MCPhysReg RegList16Tiny[] = {AVR::R26R25, AVR::R25R24,
1164                                           AVR::R24R23, AVR::R23R22,
1165                                           AVR::R22R21, AVR::R21R20};
1166 
1167 static_assert(std::size(RegList8AVR) == std::size(RegList16AVR),
1168               "8-bit and 16-bit register arrays must be of equal length");
1169 static_assert(std::size(RegList8Tiny) == std::size(RegList16Tiny),
1170               "8-bit and 16-bit register arrays must be of equal length");
1171 
1172 /// Analyze incoming and outgoing function arguments. We need custom C++ code
1173 /// to handle special constraints in the ABI.
1174 /// In addition, all pieces of a certain argument have to be passed either
1175 /// using registers or the stack but never mixing both.
1176 template <typename ArgT>
analyzeArguments(TargetLowering::CallLoweringInfo * CLI,const Function * F,const DataLayout * TD,const SmallVectorImpl<ArgT> & Args,SmallVectorImpl<CCValAssign> & ArgLocs,CCState & CCInfo,bool Tiny)1177 static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI,
1178                              const Function *F, const DataLayout *TD,
1179                              const SmallVectorImpl<ArgT> &Args,
1180                              SmallVectorImpl<CCValAssign> &ArgLocs,
1181                              CCState &CCInfo, bool Tiny) {
1182   // Choose the proper register list for argument passing according to the ABI.
1183   ArrayRef<MCPhysReg> RegList8;
1184   ArrayRef<MCPhysReg> RegList16;
1185   if (Tiny) {
1186     RegList8 = ArrayRef(RegList8Tiny);
1187     RegList16 = ArrayRef(RegList16Tiny);
1188   } else {
1189     RegList8 = ArrayRef(RegList8AVR);
1190     RegList16 = ArrayRef(RegList16AVR);
1191   }
1192 
1193   unsigned NumArgs = Args.size();
1194   // This is the index of the last used register, in RegList*.
1195   // -1 means R26 (R26 is never actually used in CC).
1196   int RegLastIdx = -1;
1197   // Once a value is passed to the stack it will always be used
1198   bool UseStack = false;
1199   for (unsigned i = 0; i != NumArgs;) {
1200     MVT VT = Args[i].VT;
1201     // We have to count the number of bytes for each function argument, that is
1202     // those Args with the same OrigArgIndex. This is important in case the
1203     // function takes an aggregate type.
1204     // Current argument will be between [i..j).
1205     unsigned ArgIndex = Args[i].OrigArgIndex;
1206     unsigned TotalBytes = VT.getStoreSize();
1207     unsigned j = i + 1;
1208     for (; j != NumArgs; ++j) {
1209       if (Args[j].OrigArgIndex != ArgIndex)
1210         break;
1211       TotalBytes += Args[j].VT.getStoreSize();
1212     }
1213     // Round up to even number of bytes.
1214     TotalBytes = alignTo(TotalBytes, 2);
1215     // Skip zero sized arguments
1216     if (TotalBytes == 0)
1217       continue;
1218     // The index of the first register to be used
1219     unsigned RegIdx = RegLastIdx + TotalBytes;
1220     RegLastIdx = RegIdx;
1221     // If there are not enough registers, use the stack
1222     if (RegIdx >= RegList8.size()) {
1223       UseStack = true;
1224     }
1225     for (; i != j; ++i) {
1226       MVT VT = Args[i].VT;
1227 
1228       if (UseStack) {
1229         auto evt = EVT(VT).getTypeForEVT(CCInfo.getContext());
1230         unsigned Offset = CCInfo.AllocateStack(TD->getTypeAllocSize(evt),
1231                                                TD->getABITypeAlign(evt));
1232         CCInfo.addLoc(
1233             CCValAssign::getMem(i, VT, Offset, VT, CCValAssign::Full));
1234       } else {
1235         unsigned Reg;
1236         if (VT == MVT::i8) {
1237           Reg = CCInfo.AllocateReg(RegList8[RegIdx]);
1238         } else if (VT == MVT::i16) {
1239           Reg = CCInfo.AllocateReg(RegList16[RegIdx]);
1240         } else {
1241           llvm_unreachable(
1242               "calling convention can only manage i8 and i16 types");
1243         }
1244         assert(Reg && "register not available in calling convention");
1245         CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full));
1246         // Registers inside a particular argument are sorted in increasing order
1247         // (remember the array is reversed).
1248         RegIdx -= VT.getStoreSize();
1249       }
1250     }
1251   }
1252 }
1253 
1254 /// Count the total number of bytes needed to pass or return these arguments.
1255 template <typename ArgT>
1256 static unsigned
getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> & Args)1257 getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) {
1258   unsigned TotalBytes = 0;
1259 
1260   for (const ArgT &Arg : Args) {
1261     TotalBytes += Arg.VT.getStoreSize();
1262   }
1263   return TotalBytes;
1264 }
1265 
1266 /// Analyze incoming and outgoing value of returning from a function.
1267 /// The algorithm is similar to analyzeArguments, but there can only be
1268 /// one value, possibly an aggregate, and it is limited to 8 bytes.
1269 template <typename ArgT>
analyzeReturnValues(const SmallVectorImpl<ArgT> & Args,CCState & CCInfo,bool Tiny)1270 static void analyzeReturnValues(const SmallVectorImpl<ArgT> &Args,
1271                                 CCState &CCInfo, bool Tiny) {
1272   unsigned NumArgs = Args.size();
1273   unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args);
1274   // CanLowerReturn() guarantees this assertion.
1275   if (Tiny)
1276     assert(TotalBytes <= 4 &&
1277            "return values greater than 4 bytes cannot be lowered on AVRTiny");
1278   else
1279     assert(TotalBytes <= 8 &&
1280            "return values greater than 8 bytes cannot be lowered on AVR");
1281 
1282   // Choose the proper register list for argument passing according to the ABI.
1283   ArrayRef<MCPhysReg> RegList8;
1284   ArrayRef<MCPhysReg> RegList16;
1285   if (Tiny) {
1286     RegList8 = ArrayRef(RegList8Tiny);
1287     RegList16 = ArrayRef(RegList16Tiny);
1288   } else {
1289     RegList8 = ArrayRef(RegList8AVR);
1290     RegList16 = ArrayRef(RegList16AVR);
1291   }
1292 
1293   // GCC-ABI says that the size is rounded up to the next even number,
1294   // but actually once it is more than 4 it will always round up to 8.
1295   if (TotalBytes > 4) {
1296     TotalBytes = 8;
1297   } else {
1298     TotalBytes = alignTo(TotalBytes, 2);
1299   }
1300 
1301   // The index of the first register to use.
1302   int RegIdx = TotalBytes - 1;
1303   for (unsigned i = 0; i != NumArgs; ++i) {
1304     MVT VT = Args[i].VT;
1305     unsigned Reg;
1306     if (VT == MVT::i8) {
1307       Reg = CCInfo.AllocateReg(RegList8[RegIdx]);
1308     } else if (VT == MVT::i16) {
1309       Reg = CCInfo.AllocateReg(RegList16[RegIdx]);
1310     } else {
1311       llvm_unreachable("calling convention can only manage i8 and i16 types");
1312     }
1313     assert(Reg && "register not available in calling convention");
1314     CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full));
1315     // Registers sort in increasing order
1316     RegIdx -= VT.getStoreSize();
1317   }
1318 }
1319 
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const1320 SDValue AVRTargetLowering::LowerFormalArguments(
1321     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1322     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1323     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1324   MachineFunction &MF = DAG.getMachineFunction();
1325   MachineFrameInfo &MFI = MF.getFrameInfo();
1326   auto DL = DAG.getDataLayout();
1327 
1328   // Assign locations to all of the incoming arguments.
1329   SmallVector<CCValAssign, 16> ArgLocs;
1330   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1331                  *DAG.getContext());
1332 
1333   // Variadic functions do not need all the analysis below.
1334   if (isVarArg) {
1335     CCInfo.AnalyzeFormalArguments(Ins, ArgCC_AVR_Vararg);
1336   } else {
1337     analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo,
1338                      Subtarget.hasTinyEncoding());
1339   }
1340 
1341   SDValue ArgValue;
1342   for (CCValAssign &VA : ArgLocs) {
1343 
1344     // Arguments stored on registers.
1345     if (VA.isRegLoc()) {
1346       EVT RegVT = VA.getLocVT();
1347       const TargetRegisterClass *RC;
1348       if (RegVT == MVT::i8) {
1349         RC = &AVR::GPR8RegClass;
1350       } else if (RegVT == MVT::i16) {
1351         RC = &AVR::DREGSRegClass;
1352       } else {
1353         llvm_unreachable("Unknown argument type!");
1354       }
1355 
1356       Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1357       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1358 
1359       // :NOTE: Clang should not promote any i8 into i16 but for safety the
1360       // following code will handle zexts or sexts generated by other
1361       // front ends. Otherwise:
1362       // If this is an 8 bit value, it is really passed promoted
1363       // to 16 bits. Insert an assert[sz]ext to capture this, then
1364       // truncate to the right size.
1365       switch (VA.getLocInfo()) {
1366       default:
1367         llvm_unreachable("Unknown loc info!");
1368       case CCValAssign::Full:
1369         break;
1370       case CCValAssign::BCvt:
1371         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
1372         break;
1373       case CCValAssign::SExt:
1374         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1375                                DAG.getValueType(VA.getValVT()));
1376         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1377         break;
1378       case CCValAssign::ZExt:
1379         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1380                                DAG.getValueType(VA.getValVT()));
1381         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1382         break;
1383       }
1384 
1385       InVals.push_back(ArgValue);
1386     } else {
1387       // Only arguments passed on the stack should make it here.
1388       assert(VA.isMemLoc());
1389 
1390       EVT LocVT = VA.getLocVT();
1391 
1392       // Create the frame index object for this incoming parameter.
1393       int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1394                                      VA.getLocMemOffset(), true);
1395 
1396       // Create the SelectionDAG nodes corresponding to a load
1397       // from this parameter.
1398       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DL));
1399       InVals.push_back(DAG.getLoad(LocVT, dl, Chain, FIN,
1400                                    MachinePointerInfo::getFixedStack(MF, FI)));
1401     }
1402   }
1403 
1404   // If the function takes variable number of arguments, make a frame index for
1405   // the start of the first vararg value... for expansion of llvm.va_start.
1406   if (isVarArg) {
1407     unsigned StackSize = CCInfo.getStackSize();
1408     AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
1409 
1410     AFI->setVarArgsFrameIndex(MFI.CreateFixedObject(2, StackSize, true));
1411   }
1412 
1413   return Chain;
1414 }
1415 
1416 //===----------------------------------------------------------------------===//
1417 //                  Call Calling Convention Implementation
1418 //===----------------------------------------------------------------------===//
1419 
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const1420 SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1421                                      SmallVectorImpl<SDValue> &InVals) const {
1422   SelectionDAG &DAG = CLI.DAG;
1423   SDLoc &DL = CLI.DL;
1424   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1425   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1426   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1427   SDValue Chain = CLI.Chain;
1428   SDValue Callee = CLI.Callee;
1429   bool &isTailCall = CLI.IsTailCall;
1430   CallingConv::ID CallConv = CLI.CallConv;
1431   bool isVarArg = CLI.IsVarArg;
1432 
1433   MachineFunction &MF = DAG.getMachineFunction();
1434 
1435   // AVR does not yet support tail call optimization.
1436   isTailCall = false;
1437 
1438   // Analyze operands of the call, assigning locations to each operand.
1439   SmallVector<CCValAssign, 16> ArgLocs;
1440   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1441                  *DAG.getContext());
1442 
1443   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1444   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1445   // node so that legalize doesn't hack it.
1446   const Function *F = nullptr;
1447   if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1448     const GlobalValue *GV = G->getGlobal();
1449     if (isa<Function>(GV))
1450       F = cast<Function>(GV);
1451     Callee =
1452         DAG.getTargetGlobalAddress(GV, DL, getPointerTy(DAG.getDataLayout()));
1453   } else if (const ExternalSymbolSDNode *ES =
1454                  dyn_cast<ExternalSymbolSDNode>(Callee)) {
1455     Callee = DAG.getTargetExternalSymbol(ES->getSymbol(),
1456                                          getPointerTy(DAG.getDataLayout()));
1457   }
1458 
1459   // Variadic functions do not need all the analysis below.
1460   if (isVarArg) {
1461     CCInfo.AnalyzeCallOperands(Outs, ArgCC_AVR_Vararg);
1462   } else {
1463     analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo,
1464                      Subtarget.hasTinyEncoding());
1465   }
1466 
1467   // Get a count of how many bytes are to be pushed on the stack.
1468   unsigned NumBytes = CCInfo.getStackSize();
1469 
1470   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1471 
1472   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
1473 
1474   // First, walk the register assignments, inserting copies.
1475   unsigned AI, AE;
1476   bool HasStackArgs = false;
1477   for (AI = 0, AE = ArgLocs.size(); AI != AE; ++AI) {
1478     CCValAssign &VA = ArgLocs[AI];
1479     EVT RegVT = VA.getLocVT();
1480     SDValue Arg = OutVals[AI];
1481 
1482     // Promote the value if needed. With Clang this should not happen.
1483     switch (VA.getLocInfo()) {
1484     default:
1485       llvm_unreachable("Unknown loc info!");
1486     case CCValAssign::Full:
1487       break;
1488     case CCValAssign::SExt:
1489       Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, RegVT, Arg);
1490       break;
1491     case CCValAssign::ZExt:
1492       Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, RegVT, Arg);
1493       break;
1494     case CCValAssign::AExt:
1495       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, RegVT, Arg);
1496       break;
1497     case CCValAssign::BCvt:
1498       Arg = DAG.getNode(ISD::BITCAST, DL, RegVT, Arg);
1499       break;
1500     }
1501 
1502     // Stop when we encounter a stack argument, we need to process them
1503     // in reverse order in the loop below.
1504     if (VA.isMemLoc()) {
1505       HasStackArgs = true;
1506       break;
1507     }
1508 
1509     // Arguments that can be passed on registers must be kept in the RegsToPass
1510     // vector.
1511     RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1512   }
1513 
1514   // Second, stack arguments have to walked.
1515   // Previously this code created chained stores but those chained stores appear
1516   // to be unchained in the legalization phase. Therefore, do not attempt to
1517   // chain them here. In fact, chaining them here somehow causes the first and
1518   // second store to be reversed which is the exact opposite of the intended
1519   // effect.
1520   if (HasStackArgs) {
1521     SmallVector<SDValue, 8> MemOpChains;
1522     for (; AI != AE; AI++) {
1523       CCValAssign &VA = ArgLocs[AI];
1524       SDValue Arg = OutVals[AI];
1525 
1526       assert(VA.isMemLoc());
1527 
1528       // SP points to one stack slot further so add one to adjust it.
1529       SDValue PtrOff = DAG.getNode(
1530           ISD::ADD, DL, getPointerTy(DAG.getDataLayout()),
1531           DAG.getRegister(AVR::SP, getPointerTy(DAG.getDataLayout())),
1532           DAG.getIntPtrConstant(VA.getLocMemOffset() + 1, DL));
1533 
1534       MemOpChains.push_back(
1535           DAG.getStore(Chain, DL, Arg, PtrOff,
1536                        MachinePointerInfo::getStack(MF, VA.getLocMemOffset())));
1537     }
1538 
1539     if (!MemOpChains.empty())
1540       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1541   }
1542 
1543   // Build a sequence of copy-to-reg nodes chained together with token chain and
1544   // flag operands which copy the outgoing args into registers.  The InGlue in
1545   // necessary since all emited instructions must be stuck together.
1546   SDValue InGlue;
1547   for (auto Reg : RegsToPass) {
1548     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, InGlue);
1549     InGlue = Chain.getValue(1);
1550   }
1551 
1552   // Returns a chain & a flag for retval copy to use.
1553   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1554   SmallVector<SDValue, 8> Ops;
1555   Ops.push_back(Chain);
1556   Ops.push_back(Callee);
1557 
1558   // Add argument registers to the end of the list so that they are known live
1559   // into the call.
1560   for (auto Reg : RegsToPass) {
1561     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1562   }
1563 
1564   // The zero register (usually R1) must be passed as an implicit register so
1565   // that this register is correctly zeroed in interrupts.
1566   Ops.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));
1567 
1568   // Add a register mask operand representing the call-preserved registers.
1569   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1570   const uint32_t *Mask =
1571       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
1572   assert(Mask && "Missing call preserved mask for calling convention");
1573   Ops.push_back(DAG.getRegisterMask(Mask));
1574 
1575   if (InGlue.getNode()) {
1576     Ops.push_back(InGlue);
1577   }
1578 
1579   Chain = DAG.getNode(AVRISD::CALL, DL, NodeTys, Ops);
1580   InGlue = Chain.getValue(1);
1581 
1582   // Create the CALLSEQ_END node.
1583   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, DL);
1584 
1585   if (!Ins.empty()) {
1586     InGlue = Chain.getValue(1);
1587   }
1588 
1589   // Handle result values, copying them out of physregs into vregs that we
1590   // return.
1591   return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, DL, DAG,
1592                          InVals);
1593 }
1594 
1595 /// Lower the result values of a call into the
1596 /// appropriate copies out of appropriate physical registers.
1597 ///
LowerCallResult(SDValue Chain,SDValue InGlue,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const1598 SDValue AVRTargetLowering::LowerCallResult(
1599     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1600     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1601     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1602 
1603   // Assign locations to each value returned by this call.
1604   SmallVector<CCValAssign, 16> RVLocs;
1605   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1606                  *DAG.getContext());
1607 
1608   // Handle runtime calling convs.
1609   if (CallConv == CallingConv::AVR_BUILTIN) {
1610     CCInfo.AnalyzeCallResult(Ins, RetCC_AVR_BUILTIN);
1611   } else {
1612     analyzeReturnValues(Ins, CCInfo, Subtarget.hasTinyEncoding());
1613   }
1614 
1615   // Copy all of the result registers out of their specified physreg.
1616   for (CCValAssign const &RVLoc : RVLocs) {
1617     Chain = DAG.getCopyFromReg(Chain, dl, RVLoc.getLocReg(), RVLoc.getValVT(),
1618                                InGlue)
1619                 .getValue(1);
1620     InGlue = Chain.getValue(2);
1621     InVals.push_back(Chain.getValue(0));
1622   }
1623 
1624   return Chain;
1625 }
1626 
1627 //===----------------------------------------------------------------------===//
1628 //               Return Value Calling Convention Implementation
1629 //===----------------------------------------------------------------------===//
1630 
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context,const Type * RetTy) const1631 bool AVRTargetLowering::CanLowerReturn(
1632     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
1633     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
1634     const Type *RetTy) const {
1635   if (CallConv == CallingConv::AVR_BUILTIN) {
1636     SmallVector<CCValAssign, 16> RVLocs;
1637     CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
1638     return CCInfo.CheckReturn(Outs, RetCC_AVR_BUILTIN);
1639   }
1640 
1641   unsigned TotalBytes = getTotalArgumentsSizeInBytes(Outs);
1642   return TotalBytes <= (unsigned)(Subtarget.hasTinyEncoding() ? 4 : 8);
1643 }
1644 
1645 SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & dl,SelectionDAG & DAG) const1646 AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1647                                bool isVarArg,
1648                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1649                                const SmallVectorImpl<SDValue> &OutVals,
1650                                const SDLoc &dl, SelectionDAG &DAG) const {
1651   // CCValAssign - represent the assignment of the return value to locations.
1652   SmallVector<CCValAssign, 16> RVLocs;
1653 
1654   // CCState - Info about the registers and stack slot.
1655   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1656                  *DAG.getContext());
1657 
1658   MachineFunction &MF = DAG.getMachineFunction();
1659 
1660   // Analyze return values.
1661   if (CallConv == CallingConv::AVR_BUILTIN) {
1662     CCInfo.AnalyzeReturn(Outs, RetCC_AVR_BUILTIN);
1663   } else {
1664     analyzeReturnValues(Outs, CCInfo, Subtarget.hasTinyEncoding());
1665   }
1666 
1667   SDValue Glue;
1668   SmallVector<SDValue, 4> RetOps(1, Chain);
1669   // Copy the result values into the output registers.
1670   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
1671     CCValAssign &VA = RVLocs[i];
1672     assert(VA.isRegLoc() && "Can only return in registers!");
1673 
1674     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Glue);
1675 
1676     // Guarantee that all emitted copies are stuck together with flags.
1677     Glue = Chain.getValue(1);
1678     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
1679   }
1680 
1681   // Don't emit the ret/reti instruction when the naked attribute is present in
1682   // the function being compiled.
1683   if (MF.getFunction().getAttributes().hasFnAttr(Attribute::Naked)) {
1684     return Chain;
1685   }
1686 
1687   const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
1688 
1689   if (!AFI->isInterruptOrSignalHandler()) {
1690     // The return instruction has an implicit zero register operand: it must
1691     // contain zero on return.
1692     // This is not needed in interrupts however, where the zero register is
1693     // handled specially (only pushed/popped when needed).
1694     RetOps.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));
1695   }
1696 
1697   unsigned RetOpc =
1698       AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_GLUE : AVRISD::RET_GLUE;
1699 
1700   RetOps[0] = Chain; // Update chain.
1701 
1702   if (Glue.getNode()) {
1703     RetOps.push_back(Glue);
1704   }
1705 
1706   return DAG.getNode(RetOpc, dl, MVT::Other, RetOps);
1707 }
1708 
1709 //===----------------------------------------------------------------------===//
1710 //  Custom Inserters
1711 //===----------------------------------------------------------------------===//
1712 
insertShift(MachineInstr & MI,MachineBasicBlock * BB,bool Tiny) const1713 MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
1714                                                   MachineBasicBlock *BB,
1715                                                   bool Tiny) const {
1716   unsigned Opc;
1717   const TargetRegisterClass *RC;
1718   bool HasRepeatedOperand = false;
1719   MachineFunction *F = BB->getParent();
1720   MachineRegisterInfo &RI = F->getRegInfo();
1721   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
1722   DebugLoc dl = MI.getDebugLoc();
1723 
1724   switch (MI.getOpcode()) {
1725   default:
1726     llvm_unreachable("Invalid shift opcode!");
1727   case AVR::Lsl8:
1728     Opc = AVR::ADDRdRr; // LSL is an alias of ADD Rd, Rd
1729     RC = &AVR::GPR8RegClass;
1730     HasRepeatedOperand = true;
1731     break;
1732   case AVR::Lsl16:
1733     Opc = AVR::LSLWRd;
1734     RC = &AVR::DREGSRegClass;
1735     break;
1736   case AVR::Asr8:
1737     Opc = AVR::ASRRd;
1738     RC = &AVR::GPR8RegClass;
1739     break;
1740   case AVR::Asr16:
1741     Opc = AVR::ASRWRd;
1742     RC = &AVR::DREGSRegClass;
1743     break;
1744   case AVR::Lsr8:
1745     Opc = AVR::LSRRd;
1746     RC = &AVR::GPR8RegClass;
1747     break;
1748   case AVR::Lsr16:
1749     Opc = AVR::LSRWRd;
1750     RC = &AVR::DREGSRegClass;
1751     break;
1752   case AVR::Rol8:
1753     Opc = Tiny ? AVR::ROLBRdR17 : AVR::ROLBRdR1;
1754     RC = &AVR::GPR8RegClass;
1755     break;
1756   case AVR::Rol16:
1757     Opc = AVR::ROLWRd;
1758     RC = &AVR::DREGSRegClass;
1759     break;
1760   case AVR::Ror8:
1761     Opc = AVR::RORBRd;
1762     RC = &AVR::GPR8RegClass;
1763     break;
1764   case AVR::Ror16:
1765     Opc = AVR::RORWRd;
1766     RC = &AVR::DREGSRegClass;
1767     break;
1768   }
1769 
1770   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1771 
1772   MachineFunction::iterator I;
1773   for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I)
1774     ;
1775   if (I != F->end())
1776     ++I;
1777 
1778   // Create loop block.
1779   MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
1780   MachineBasicBlock *CheckBB = F->CreateMachineBasicBlock(LLVM_BB);
1781   MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);
1782 
1783   F->insert(I, LoopBB);
1784   F->insert(I, CheckBB);
1785   F->insert(I, RemBB);
1786 
1787   // Update machine-CFG edges by transferring all successors of the current
1788   // block to the block containing instructions after shift.
1789   RemBB->splice(RemBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
1790                 BB->end());
1791   RemBB->transferSuccessorsAndUpdatePHIs(BB);
1792 
1793   // Add edges BB => LoopBB => CheckBB => RemBB, CheckBB => LoopBB.
1794   BB->addSuccessor(CheckBB);
1795   LoopBB->addSuccessor(CheckBB);
1796   CheckBB->addSuccessor(LoopBB);
1797   CheckBB->addSuccessor(RemBB);
1798 
1799   Register ShiftAmtReg = RI.createVirtualRegister(&AVR::GPR8RegClass);
1800   Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::GPR8RegClass);
1801   Register ShiftReg = RI.createVirtualRegister(RC);
1802   Register ShiftReg2 = RI.createVirtualRegister(RC);
1803   Register ShiftAmtSrcReg = MI.getOperand(2).getReg();
1804   Register SrcReg = MI.getOperand(1).getReg();
1805   Register DstReg = MI.getOperand(0).getReg();
1806 
1807   // BB:
1808   // rjmp CheckBB
1809   BuildMI(BB, dl, TII.get(AVR::RJMPk)).addMBB(CheckBB);
1810 
1811   // LoopBB:
1812   // ShiftReg2 = shift ShiftReg
1813   auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
1814   if (HasRepeatedOperand)
1815     ShiftMI.addReg(ShiftReg);
1816 
1817   // CheckBB:
1818   // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
1819   // ShiftAmt = phi [%N,      BB], [%ShiftAmt2, LoopBB]
1820   // DestReg  = phi [%SrcReg, BB], [%ShiftReg,  LoopBB]
1821   // ShiftAmt2 = ShiftAmt - 1;
1822   // if (ShiftAmt2 >= 0) goto LoopBB;
1823   BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftReg)
1824       .addReg(SrcReg)
1825       .addMBB(BB)
1826       .addReg(ShiftReg2)
1827       .addMBB(LoopBB);
1828   BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftAmtReg)
1829       .addReg(ShiftAmtSrcReg)
1830       .addMBB(BB)
1831       .addReg(ShiftAmtReg2)
1832       .addMBB(LoopBB);
1833   BuildMI(CheckBB, dl, TII.get(AVR::PHI), DstReg)
1834       .addReg(SrcReg)
1835       .addMBB(BB)
1836       .addReg(ShiftReg2)
1837       .addMBB(LoopBB);
1838 
1839   BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2).addReg(ShiftAmtReg);
1840   BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB);
1841 
1842   MI.eraseFromParent(); // The pseudo instruction is gone now.
1843   return RemBB;
1844 }
1845 
1846 // Do a multibyte AVR shift. Insert shift instructions and put the output
1847 // registers in the Regs array.
1848 // Because AVR does not have a normal shift instruction (only a single bit shift
1849 // instruction), we have to emulate this behavior with other instructions.
1850 // It first tries large steps (moving registers around) and then smaller steps
1851 // like single bit shifts.
1852 // Large shifts actually reduce the number of shifted registers, so the below
1853 // algorithms have to work independently of the number of registers that are
1854 // shifted.
1855 // For more information and background, see this blogpost:
1856 // https://aykevl.nl/2021/02/avr-bitshift
insertMultibyteShift(MachineInstr & MI,MachineBasicBlock * BB,MutableArrayRef<std::pair<Register,int>> Regs,ISD::NodeType Opc,int64_t ShiftAmt)1857 static void insertMultibyteShift(MachineInstr &MI, MachineBasicBlock *BB,
1858                                  MutableArrayRef<std::pair<Register, int>> Regs,
1859                                  ISD::NodeType Opc, int64_t ShiftAmt) {
1860   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1861   const AVRSubtarget &STI = BB->getParent()->getSubtarget<AVRSubtarget>();
1862   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
1863   const DebugLoc &dl = MI.getDebugLoc();
1864 
1865   const bool ShiftLeft = Opc == ISD::SHL;
1866   const bool ArithmeticShift = Opc == ISD::SRA;
1867 
1868   // Zero a register, for use in later operations.
1869   Register ZeroReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1870   BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ZeroReg)
1871       .addReg(STI.getZeroRegister());
1872 
1873   // Do a shift modulo 6 or 7. This is a bit more complicated than most shifts
1874   // and is hard to compose with the rest, so these are special cased.
1875   // The basic idea is to shift one or two bits in the opposite direction and
1876   // then move registers around to get the correct end result.
1877   if (ShiftLeft && (ShiftAmt % 8) >= 6) {
1878     // Left shift modulo 6 or 7.
1879 
1880     // Create a slice of the registers we're going to modify, to ease working
1881     // with them.
1882     size_t ShiftRegsOffset = ShiftAmt / 8;
1883     size_t ShiftRegsSize = Regs.size() - ShiftRegsOffset;
1884     MutableArrayRef<std::pair<Register, int>> ShiftRegs =
1885         Regs.slice(ShiftRegsOffset, ShiftRegsSize);
1886 
1887     // Shift one to the right, keeping the least significant bit as the carry
1888     // bit.
1889     insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);
1890 
1891     // Rotate the least significant bit from the carry bit into a new register
1892     // (that starts out zero).
1893     Register LowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1894     BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg);
1895 
1896     // Shift one more to the right if this is a modulo-6 shift.
1897     if (ShiftAmt % 8 == 6) {
1898       insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);
1899       Register NewLowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1900       BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte);
1901       LowByte = NewLowByte;
1902     }
1903 
1904     // Move all registers to the left, zeroing the bottom registers as needed.
1905     for (size_t I = 0; I < Regs.size(); I++) {
1906       int ShiftRegsIdx = I + 1;
1907       if (ShiftRegsIdx < (int)ShiftRegs.size()) {
1908         Regs[I] = ShiftRegs[ShiftRegsIdx];
1909       } else if (ShiftRegsIdx == (int)ShiftRegs.size()) {
1910         Regs[I] = std::pair(LowByte, 0);
1911       } else {
1912         Regs[I] = std::pair(ZeroReg, 0);
1913       }
1914     }
1915 
1916     return;
1917   }
1918 
1919   // Right shift modulo 6 or 7.
1920   if (!ShiftLeft && (ShiftAmt % 8) >= 6) {
1921     // Create a view on the registers we're going to modify, to ease working
1922     // with them.
1923     size_t ShiftRegsSize = Regs.size() - (ShiftAmt / 8);
1924     MutableArrayRef<std::pair<Register, int>> ShiftRegs =
1925         Regs.slice(0, ShiftRegsSize);
1926 
1927     // Shift one to the left.
1928     insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);
1929 
1930     // Sign or zero extend the most significant register into a new register.
1931     // The HighByte is the byte that still has one (or two) bits from the
1932     // original value. The ExtByte is purely a zero/sign extend byte (all bits
1933     // are either 0 or 1).
1934     Register HighByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1935     Register ExtByte = 0;
1936     if (ArithmeticShift) {
1937       // Sign-extend bit that was shifted out last.
1938       BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), HighByte)
1939           .addReg(HighByte, RegState::Undef)
1940           .addReg(HighByte, RegState::Undef);
1941       ExtByte = HighByte;
1942       // The highest bit of the original value is the same as the zero-extend
1943       // byte, so HighByte and ExtByte are the same.
1944     } else {
1945       // Use the zero register for zero extending.
1946       ExtByte = ZeroReg;
1947       // Rotate most significant bit into a new register (that starts out zero).
1948       BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), HighByte)
1949           .addReg(ExtByte)
1950           .addReg(ExtByte);
1951     }
1952 
1953     // Shift one more to the left for modulo 6 shifts.
1954     if (ShiftAmt % 8 == 6) {
1955       insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);
1956       // Shift the topmost bit into the HighByte.
1957       Register NewExt = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1958       BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), NewExt)
1959           .addReg(HighByte)
1960           .addReg(HighByte);
1961       HighByte = NewExt;
1962     }
1963 
1964     // Move all to the right, while sign or zero extending.
1965     for (int I = Regs.size() - 1; I >= 0; I--) {
1966       int ShiftRegsIdx = I - (Regs.size() - ShiftRegs.size()) - 1;
1967       if (ShiftRegsIdx >= 0) {
1968         Regs[I] = ShiftRegs[ShiftRegsIdx];
1969       } else if (ShiftRegsIdx == -1) {
1970         Regs[I] = std::pair(HighByte, 0);
1971       } else {
1972         Regs[I] = std::pair(ExtByte, 0);
1973       }
1974     }
1975 
1976     return;
1977   }
1978 
1979   // For shift amounts of at least one register, simply rename the registers and
1980   // zero the bottom registers.
1981   while (ShiftLeft && ShiftAmt >= 8) {
1982     // Move all registers one to the left.
1983     for (size_t I = 0; I < Regs.size() - 1; I++) {
1984       Regs[I] = Regs[I + 1];
1985     }
1986 
1987     // Zero the least significant register.
1988     Regs[Regs.size() - 1] = std::pair(ZeroReg, 0);
1989 
1990     // Continue shifts with the leftover registers.
1991     Regs = Regs.drop_back(1);
1992 
1993     ShiftAmt -= 8;
1994   }
1995 
1996   // And again, the same for right shifts.
1997   Register ShrExtendReg = 0;
1998   if (!ShiftLeft && ShiftAmt >= 8) {
1999     if (ArithmeticShift) {
2000       // Sign extend the most significant register into ShrExtendReg.
2001       ShrExtendReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2002       Register Tmp = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2003       BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Tmp)
2004           .addReg(Regs[0].first, 0, Regs[0].second)
2005           .addReg(Regs[0].first, 0, Regs[0].second);
2006       BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), ShrExtendReg)
2007           .addReg(Tmp)
2008           .addReg(Tmp);
2009     } else {
2010       ShrExtendReg = ZeroReg;
2011     }
2012     for (; ShiftAmt >= 8; ShiftAmt -= 8) {
2013       // Move all registers one to the right.
2014       for (size_t I = Regs.size() - 1; I != 0; I--) {
2015         Regs[I] = Regs[I - 1];
2016       }
2017 
2018       // Zero or sign extend the most significant register.
2019       Regs[0] = std::pair(ShrExtendReg, 0);
2020 
2021       // Continue shifts with the leftover registers.
2022       Regs = Regs.drop_front(1);
2023     }
2024   }
2025 
2026   // The bigger shifts are already handled above.
2027   assert((ShiftAmt < 8) && "Unexpect shift amount");
2028 
2029   // Shift by four bits, using a complicated swap/eor/andi/eor sequence.
2030   // It only works for logical shifts because the bits shifted in are all
2031   // zeroes.
2032   // To shift a single byte right, it produces code like this:
2033   //   swap r0
2034   //   andi r0, 0x0f
2035   // For a two-byte (16-bit) shift, it adds the following instructions to shift
2036   // the upper byte into the lower byte:
2037   //   swap r1
2038   //   eor r0, r1
2039   //   andi r1, 0x0f
2040   //   eor r0, r1
2041   // For bigger shifts, it repeats the above sequence. For example, for a 3-byte
2042   // (24-bit) shift it adds:
2043   //   swap r2
2044   //   eor r1, r2
2045   //   andi r2, 0x0f
2046   //   eor r1, r2
2047   if (!ArithmeticShift && ShiftAmt >= 4) {
2048     Register Prev = 0;
2049     for (size_t I = 0; I < Regs.size(); I++) {
2050       size_t Idx = ShiftLeft ? I : Regs.size() - I - 1;
2051       Register SwapReg = MRI.createVirtualRegister(&AVR::LD8RegClass);
2052       BuildMI(*BB, MI, dl, TII.get(AVR::SWAPRd), SwapReg)
2053           .addReg(Regs[Idx].first, 0, Regs[Idx].second);
2054       if (I != 0) {
2055         Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2056         BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)
2057             .addReg(Prev)
2058             .addReg(SwapReg);
2059         Prev = R;
2060       }
2061       Register AndReg = MRI.createVirtualRegister(&AVR::LD8RegClass);
2062       BuildMI(*BB, MI, dl, TII.get(AVR::ANDIRdK), AndReg)
2063           .addReg(SwapReg)
2064           .addImm(ShiftLeft ? 0xf0 : 0x0f);
2065       if (I != 0) {
2066         Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2067         BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)
2068             .addReg(Prev)
2069             .addReg(AndReg);
2070         size_t PrevIdx = ShiftLeft ? Idx - 1 : Idx + 1;
2071         Regs[PrevIdx] = std::pair(R, 0);
2072       }
2073       Prev = AndReg;
2074       Regs[Idx] = std::pair(AndReg, 0);
2075     }
2076     ShiftAmt -= 4;
2077   }
2078 
2079   // Shift by one. This is the fallback that always works, and the shift
2080   // operation that is used for 1, 2, and 3 bit shifts.
2081   while (ShiftLeft && ShiftAmt) {
2082     // Shift one to the left.
2083     for (ssize_t I = Regs.size() - 1; I >= 0; I--) {
2084       Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2085       Register In = Regs[I].first;
2086       Register InSubreg = Regs[I].second;
2087       if (I == (ssize_t)Regs.size() - 1) { // first iteration
2088         BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Out)
2089             .addReg(In, 0, InSubreg)
2090             .addReg(In, 0, InSubreg);
2091       } else {
2092         BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Out)
2093             .addReg(In, 0, InSubreg)
2094             .addReg(In, 0, InSubreg);
2095       }
2096       Regs[I] = std::pair(Out, 0);
2097     }
2098     ShiftAmt--;
2099   }
2100   while (!ShiftLeft && ShiftAmt) {
2101     // Shift one to the right.
2102     for (size_t I = 0; I < Regs.size(); I++) {
2103       Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2104       Register In = Regs[I].first;
2105       Register InSubreg = Regs[I].second;
2106       if (I == 0) {
2107         unsigned Opc = ArithmeticShift ? AVR::ASRRd : AVR::LSRRd;
2108         BuildMI(*BB, MI, dl, TII.get(Opc), Out).addReg(In, 0, InSubreg);
2109       } else {
2110         BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), Out).addReg(In, 0, InSubreg);
2111       }
2112       Regs[I] = std::pair(Out, 0);
2113     }
2114     ShiftAmt--;
2115   }
2116 
2117   if (ShiftAmt != 0) {
2118     llvm_unreachable("don't know how to shift!"); // sanity check
2119   }
2120 }
2121 
2122 // Do a wide (32-bit) shift.
2123 MachineBasicBlock *
insertWideShift(MachineInstr & MI,MachineBasicBlock * BB) const2124 AVRTargetLowering::insertWideShift(MachineInstr &MI,
2125                                    MachineBasicBlock *BB) const {
2126   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2127   const DebugLoc &dl = MI.getDebugLoc();
2128 
2129   // How much to shift to the right (meaning: a negative number indicates a left
2130   // shift).
2131   int64_t ShiftAmt = MI.getOperand(4).getImm();
2132   ISD::NodeType Opc;
2133   switch (MI.getOpcode()) {
2134   case AVR::Lsl32:
2135     Opc = ISD::SHL;
2136     break;
2137   case AVR::Lsr32:
2138     Opc = ISD::SRL;
2139     break;
2140   case AVR::Asr32:
2141     Opc = ISD::SRA;
2142     break;
2143   }
2144 
2145   // Read the input registers, with the most significant register at index 0.
2146   std::array<std::pair<Register, int>, 4> Registers = {
2147       std::pair(MI.getOperand(3).getReg(), AVR::sub_hi),
2148       std::pair(MI.getOperand(3).getReg(), AVR::sub_lo),
2149       std::pair(MI.getOperand(2).getReg(), AVR::sub_hi),
2150       std::pair(MI.getOperand(2).getReg(), AVR::sub_lo),
2151   };
2152 
2153   // Do the shift. The registers are modified in-place.
2154   insertMultibyteShift(MI, BB, Registers, Opc, ShiftAmt);
2155 
2156   // Combine the 8-bit registers into 16-bit register pairs.
2157   // This done either from LSB to MSB or from MSB to LSB, depending on the
2158   // shift. It's an optimization so that the register allocator will use the
2159   // fewest movs possible (which order we use isn't a correctness issue, just an
2160   // optimization issue).
2161   //   - lsl prefers starting from the most significant byte (2nd case).
2162   //   - lshr prefers starting from the least significant byte (1st case).
2163   //   - for ashr it depends on the number of shifted bytes.
2164   // Some shift operations still don't get the most optimal mov sequences even
2165   // with this distinction. TODO: figure out why and try to fix it (but we're
2166   // already equal to or faster than avr-gcc in all cases except ashr 8).
2167   if (Opc != ISD::SHL &&
2168       (Opc != ISD::SRA || (ShiftAmt < 16 || ShiftAmt >= 22))) {
2169     // Use the resulting registers starting with the least significant byte.
2170     BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg())
2171         .addReg(Registers[3].first, 0, Registers[3].second)
2172         .addImm(AVR::sub_lo)
2173         .addReg(Registers[2].first, 0, Registers[2].second)
2174         .addImm(AVR::sub_hi);
2175     BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg())
2176         .addReg(Registers[1].first, 0, Registers[1].second)
2177         .addImm(AVR::sub_lo)
2178         .addReg(Registers[0].first, 0, Registers[0].second)
2179         .addImm(AVR::sub_hi);
2180   } else {
2181     // Use the resulting registers starting with the most significant byte.
2182     BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg())
2183         .addReg(Registers[0].first, 0, Registers[0].second)
2184         .addImm(AVR::sub_hi)
2185         .addReg(Registers[1].first, 0, Registers[1].second)
2186         .addImm(AVR::sub_lo);
2187     BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg())
2188         .addReg(Registers[2].first, 0, Registers[2].second)
2189         .addImm(AVR::sub_hi)
2190         .addReg(Registers[3].first, 0, Registers[3].second)
2191         .addImm(AVR::sub_lo);
2192   }
2193 
2194   // Remove the pseudo instruction.
2195   MI.eraseFromParent();
2196   return BB;
2197 }
2198 
isCopyMulResult(MachineBasicBlock::iterator const & I)2199 static bool isCopyMulResult(MachineBasicBlock::iterator const &I) {
2200   if (I->getOpcode() == AVR::COPY) {
2201     Register SrcReg = I->getOperand(1).getReg();
2202     return (SrcReg == AVR::R0 || SrcReg == AVR::R1);
2203   }
2204 
2205   return false;
2206 }
2207 
2208 // The mul instructions wreak havock on our zero_reg R1. We need to clear it
2209 // after the result has been evacuated. This is probably not the best way to do
2210 // it, but it works for now.
insertMul(MachineInstr & MI,MachineBasicBlock * BB) const2211 MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
2212                                                 MachineBasicBlock *BB) const {
2213   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2214   MachineBasicBlock::iterator I(MI);
2215   ++I; // in any case insert *after* the mul instruction
2216   if (isCopyMulResult(I))
2217     ++I;
2218   if (isCopyMulResult(I))
2219     ++I;
2220   BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::EORRdRr), AVR::R1)
2221       .addReg(AVR::R1)
2222       .addReg(AVR::R1);
2223   return BB;
2224 }
2225 
2226 // Insert a read from the zero register.
2227 MachineBasicBlock *
insertCopyZero(MachineInstr & MI,MachineBasicBlock * BB) const2228 AVRTargetLowering::insertCopyZero(MachineInstr &MI,
2229                                   MachineBasicBlock *BB) const {
2230   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2231   MachineBasicBlock::iterator I(MI);
2232   BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY))
2233       .add(MI.getOperand(0))
2234       .addReg(Subtarget.getZeroRegister());
2235   MI.eraseFromParent();
2236   return BB;
2237 }
2238 
2239 // Lower atomicrmw operation to disable interrupts, do operation, and restore
2240 // interrupts. This works because all AVR microcontrollers are single core.
insertAtomicArithmeticOp(MachineInstr & MI,MachineBasicBlock * BB,unsigned Opcode,int Width) const2241 MachineBasicBlock *AVRTargetLowering::insertAtomicArithmeticOp(
2242     MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, int Width) const {
2243   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
2244   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2245   MachineBasicBlock::iterator I(MI);
2246   DebugLoc dl = MI.getDebugLoc();
2247 
2248   // Example instruction sequence, for an atomic 8-bit add:
2249   //   ldi r25, 5
2250   //   in r0, SREG
2251   //   cli
2252   //   ld r24, X
2253   //   add r25, r24
2254   //   st X, r25
2255   //   out SREG, r0
2256 
2257   const TargetRegisterClass *RC =
2258       (Width == 8) ? &AVR::GPR8RegClass : &AVR::DREGSRegClass;
2259   unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
2260   unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
2261 
2262   // Disable interrupts.
2263   BuildMI(*BB, I, dl, TII.get(AVR::INRdA), Subtarget.getTmpRegister())
2264       .addImm(Subtarget.getIORegSREG());
2265   BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7);
2266 
2267   // Load the original value.
2268   BuildMI(*BB, I, dl, TII.get(LoadOpcode), MI.getOperand(0).getReg())
2269       .add(MI.getOperand(1));
2270 
2271   // Do the arithmetic operation.
2272   Register Result = MRI.createVirtualRegister(RC);
2273   BuildMI(*BB, I, dl, TII.get(Opcode), Result)
2274       .addReg(MI.getOperand(0).getReg())
2275       .add(MI.getOperand(2));
2276 
2277   // Store the result.
2278   BuildMI(*BB, I, dl, TII.get(StoreOpcode))
2279       .add(MI.getOperand(1))
2280       .addReg(Result);
2281 
2282   // Restore interrupts.
2283   BuildMI(*BB, I, dl, TII.get(AVR::OUTARr))
2284       .addImm(Subtarget.getIORegSREG())
2285       .addReg(Subtarget.getTmpRegister());
2286 
2287   // Remove the pseudo instruction.
2288   MI.eraseFromParent();
2289   return BB;
2290 }
2291 
2292 MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * MBB) const2293 AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2294                                                MachineBasicBlock *MBB) const {
2295   int Opc = MI.getOpcode();
2296   const AVRSubtarget &STI = MBB->getParent()->getSubtarget<AVRSubtarget>();
2297 
2298   // Pseudo shift instructions with a non constant shift amount are expanded
2299   // into a loop.
2300   switch (Opc) {
2301   case AVR::Lsl8:
2302   case AVR::Lsl16:
2303   case AVR::Lsr8:
2304   case AVR::Lsr16:
2305   case AVR::Rol8:
2306   case AVR::Rol16:
2307   case AVR::Ror8:
2308   case AVR::Ror16:
2309   case AVR::Asr8:
2310   case AVR::Asr16:
2311     return insertShift(MI, MBB, STI.hasTinyEncoding());
2312   case AVR::Lsl32:
2313   case AVR::Lsr32:
2314   case AVR::Asr32:
2315     return insertWideShift(MI, MBB);
2316   case AVR::MULRdRr:
2317   case AVR::MULSRdRr:
2318     return insertMul(MI, MBB);
2319   case AVR::CopyZero:
2320     return insertCopyZero(MI, MBB);
2321   case AVR::AtomicLoadAdd8:
2322     return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8);
2323   case AVR::AtomicLoadAdd16:
2324     return insertAtomicArithmeticOp(MI, MBB, AVR::ADDWRdRr, 16);
2325   case AVR::AtomicLoadSub8:
2326     return insertAtomicArithmeticOp(MI, MBB, AVR::SUBRdRr, 8);
2327   case AVR::AtomicLoadSub16:
2328     return insertAtomicArithmeticOp(MI, MBB, AVR::SUBWRdRr, 16);
2329   case AVR::AtomicLoadAnd8:
2330     return insertAtomicArithmeticOp(MI, MBB, AVR::ANDRdRr, 8);
2331   case AVR::AtomicLoadAnd16:
2332     return insertAtomicArithmeticOp(MI, MBB, AVR::ANDWRdRr, 16);
2333   case AVR::AtomicLoadOr8:
2334     return insertAtomicArithmeticOp(MI, MBB, AVR::ORRdRr, 8);
2335   case AVR::AtomicLoadOr16:
2336     return insertAtomicArithmeticOp(MI, MBB, AVR::ORWRdRr, 16);
2337   case AVR::AtomicLoadXor8:
2338     return insertAtomicArithmeticOp(MI, MBB, AVR::EORRdRr, 8);
2339   case AVR::AtomicLoadXor16:
2340     return insertAtomicArithmeticOp(MI, MBB, AVR::EORWRdRr, 16);
2341   }
2342 
2343   assert((Opc == AVR::Select16 || Opc == AVR::Select8) &&
2344          "Unexpected instr type to insert");
2345 
2346   const AVRInstrInfo &TII = (const AVRInstrInfo &)*MI.getParent()
2347                                 ->getParent()
2348                                 ->getSubtarget()
2349                                 .getInstrInfo();
2350   DebugLoc dl = MI.getDebugLoc();
2351 
2352   // To "insert" a SELECT instruction, we insert the diamond
2353   // control-flow pattern. The incoming instruction knows the
2354   // destination vreg to set, the condition code register to branch
2355   // on, the true/false values to select between, and a branch opcode
2356   // to use.
2357 
2358   MachineFunction *MF = MBB->getParent();
2359   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2360   MachineBasicBlock *FallThrough = MBB->getFallThrough();
2361 
2362   // If the current basic block falls through to another basic block,
2363   // we must insert an unconditional branch to the fallthrough destination
2364   // if we are to insert basic blocks at the prior fallthrough point.
2365   if (FallThrough != nullptr) {
2366     BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(FallThrough);
2367   }
2368 
2369   MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
2370   MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB);
2371 
2372   MachineFunction::iterator I;
2373   for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I)
2374     ;
2375   if (I != MF->end())
2376     ++I;
2377   MF->insert(I, trueMBB);
2378   MF->insert(I, falseMBB);
2379 
2380   // Set the call frame size on entry to the new basic blocks.
2381   unsigned CallFrameSize = TII.getCallFrameSizeAt(MI);
2382   trueMBB->setCallFrameSize(CallFrameSize);
2383   falseMBB->setCallFrameSize(CallFrameSize);
2384 
2385   // Transfer remaining instructions and all successors of the current
2386   // block to the block which will contain the Phi node for the
2387   // select.
2388   trueMBB->splice(trueMBB->begin(), MBB,
2389                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2390   trueMBB->transferSuccessorsAndUpdatePHIs(MBB);
2391 
2392   AVRCC::CondCodes CC = (AVRCC::CondCodes)MI.getOperand(3).getImm();
2393   BuildMI(MBB, dl, TII.getBrCond(CC)).addMBB(trueMBB);
2394   BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(falseMBB);
2395   MBB->addSuccessor(falseMBB);
2396   MBB->addSuccessor(trueMBB);
2397 
2398   // Unconditionally flow back to the true block
2399   BuildMI(falseMBB, dl, TII.get(AVR::RJMPk)).addMBB(trueMBB);
2400   falseMBB->addSuccessor(trueMBB);
2401 
2402   // Set up the Phi node to determine where we came from
2403   BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI),
2404           MI.getOperand(0).getReg())
2405       .addReg(MI.getOperand(1).getReg())
2406       .addMBB(MBB)
2407       .addReg(MI.getOperand(2).getReg())
2408       .addMBB(falseMBB);
2409 
2410   MI.eraseFromParent(); // The pseudo instruction is gone now.
2411   return trueMBB;
2412 }
2413 
2414 //===----------------------------------------------------------------------===//
2415 //  Inline Asm Support
2416 //===----------------------------------------------------------------------===//
2417 
2418 AVRTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const2419 AVRTargetLowering::getConstraintType(StringRef Constraint) const {
2420   if (Constraint.size() == 1) {
2421     // See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html
2422     switch (Constraint[0]) {
2423     default:
2424       break;
2425     case 'a': // Simple upper registers
2426     case 'b': // Base pointer registers pairs
2427     case 'd': // Upper register
2428     case 'l': // Lower registers
2429     case 'e': // Pointer register pairs
2430     case 'q': // Stack pointer register
2431     case 'r': // Any register
2432     case 'w': // Special upper register pairs
2433       return C_RegisterClass;
2434     case 't': // Temporary register
2435     case 'x':
2436     case 'X': // Pointer register pair X
2437     case 'y':
2438     case 'Y': // Pointer register pair Y
2439     case 'z':
2440     case 'Z': // Pointer register pair Z
2441       return C_Register;
2442     case 'Q': // A memory address based on Y or Z pointer with displacement.
2443       return C_Memory;
2444     case 'G': // Floating point constant
2445     case 'I': // 6-bit positive integer constant
2446     case 'J': // 6-bit negative integer constant
2447     case 'K': // Integer constant (Range: 2)
2448     case 'L': // Integer constant (Range: 0)
2449     case 'M': // 8-bit integer constant
2450     case 'N': // Integer constant (Range: -1)
2451     case 'O': // Integer constant (Range: 8, 16, 24)
2452     case 'P': // Integer constant (Range: 1)
2453     case 'R': // Integer constant (Range: -6 to 5)x
2454       return C_Immediate;
2455     }
2456   }
2457 
2458   return TargetLowering::getConstraintType(Constraint);
2459 }
2460 
2461 InlineAsm::ConstraintCode
getInlineAsmMemConstraint(StringRef ConstraintCode) const2462 AVRTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
2463   // Not sure if this is actually the right thing to do, but we got to do
2464   // *something* [agnat]
2465   switch (ConstraintCode[0]) {
2466   case 'Q':
2467     return InlineAsm::ConstraintCode::Q;
2468   }
2469   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
2470 }
2471 
2472 AVRTargetLowering::ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo & info,const char * constraint) const2473 AVRTargetLowering::getSingleConstraintMatchWeight(
2474     AsmOperandInfo &info, const char *constraint) const {
2475   ConstraintWeight weight = CW_Invalid;
2476   Value *CallOperandVal = info.CallOperandVal;
2477 
2478   // If we don't have a value, we can't do a match,
2479   // but allow it at the lowest weight.
2480   // (this behaviour has been copied from the ARM backend)
2481   if (!CallOperandVal) {
2482     return CW_Default;
2483   }
2484 
2485   // Look at the constraint type.
2486   switch (*constraint) {
2487   default:
2488     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
2489     break;
2490   case 'd':
2491   case 'r':
2492   case 'l':
2493     weight = CW_Register;
2494     break;
2495   case 'a':
2496   case 'b':
2497   case 'e':
2498   case 'q':
2499   case 't':
2500   case 'w':
2501   case 'x':
2502   case 'X':
2503   case 'y':
2504   case 'Y':
2505   case 'z':
2506   case 'Z':
2507     weight = CW_SpecificReg;
2508     break;
2509   case 'G':
2510     if (const ConstantFP *C = dyn_cast<ConstantFP>(CallOperandVal)) {
2511       if (C->isZero()) {
2512         weight = CW_Constant;
2513       }
2514     }
2515     break;
2516   case 'I':
2517     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2518       if (isUInt<6>(C->getZExtValue())) {
2519         weight = CW_Constant;
2520       }
2521     }
2522     break;
2523   case 'J':
2524     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2525       if ((C->getSExtValue() >= -63) && (C->getSExtValue() <= 0)) {
2526         weight = CW_Constant;
2527       }
2528     }
2529     break;
2530   case 'K':
2531     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2532       if (C->getZExtValue() == 2) {
2533         weight = CW_Constant;
2534       }
2535     }
2536     break;
2537   case 'L':
2538     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2539       if (C->getZExtValue() == 0) {
2540         weight = CW_Constant;
2541       }
2542     }
2543     break;
2544   case 'M':
2545     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2546       if (isUInt<8>(C->getZExtValue())) {
2547         weight = CW_Constant;
2548       }
2549     }
2550     break;
2551   case 'N':
2552     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2553       if (C->getSExtValue() == -1) {
2554         weight = CW_Constant;
2555       }
2556     }
2557     break;
2558   case 'O':
2559     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2560       if ((C->getZExtValue() == 8) || (C->getZExtValue() == 16) ||
2561           (C->getZExtValue() == 24)) {
2562         weight = CW_Constant;
2563       }
2564     }
2565     break;
2566   case 'P':
2567     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2568       if (C->getZExtValue() == 1) {
2569         weight = CW_Constant;
2570       }
2571     }
2572     break;
2573   case 'R':
2574     if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2575       if ((C->getSExtValue() >= -6) && (C->getSExtValue() <= 5)) {
2576         weight = CW_Constant;
2577       }
2578     }
2579     break;
2580   case 'Q':
2581     weight = CW_Memory;
2582     break;
2583   }
2584 
2585   return weight;
2586 }
2587 
2588 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const2589 AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2590                                                 StringRef Constraint,
2591                                                 MVT VT) const {
2592   if (Constraint.size() == 1) {
2593     switch (Constraint[0]) {
2594     case 'a': // Simple upper registers r16..r23.
2595       if (VT == MVT::i8)
2596         return std::make_pair(0U, &AVR::LD8loRegClass);
2597       else if (VT == MVT::i16)
2598         return std::make_pair(0U, &AVR::DREGSLD8loRegClass);
2599       break;
2600     case 'b': // Base pointer registers: y, z.
2601       if (VT == MVT::i8 || VT == MVT::i16)
2602         return std::make_pair(0U, &AVR::PTRDISPREGSRegClass);
2603       break;
2604     case 'd': // Upper registers r16..r31.
2605       if (VT == MVT::i8)
2606         return std::make_pair(0U, &AVR::LD8RegClass);
2607       else if (VT == MVT::i16)
2608         return std::make_pair(0U, &AVR::DLDREGSRegClass);
2609       break;
2610     case 'l': // Lower registers r0..r15.
2611       if (VT == MVT::i8)
2612         return std::make_pair(0U, &AVR::GPR8loRegClass);
2613       else if (VT == MVT::i16)
2614         return std::make_pair(0U, &AVR::DREGSloRegClass);
2615       break;
2616     case 'e': // Pointer register pairs: x, y, z.
2617       if (VT == MVT::i8 || VT == MVT::i16)
2618         return std::make_pair(0U, &AVR::PTRREGSRegClass);
2619       break;
2620     case 'q': // Stack pointer register: SPH:SPL.
2621       return std::make_pair(0U, &AVR::GPRSPRegClass);
2622     case 'r': // Any register: r0..r31.
2623       if (VT == MVT::i8)
2624         return std::make_pair(0U, &AVR::GPR8RegClass);
2625       else if (VT == MVT::i16)
2626         return std::make_pair(0U, &AVR::DREGSRegClass);
2627       break;
2628     case 't': // Temporary register: r0.
2629       if (VT == MVT::i8)
2630         return std::make_pair(unsigned(Subtarget.getTmpRegister()),
2631                               &AVR::GPR8RegClass);
2632       break;
2633     case 'w': // Special upper register pairs: r24, r26, r28, r30.
2634       if (VT == MVT::i8 || VT == MVT::i16)
2635         return std::make_pair(0U, &AVR::IWREGSRegClass);
2636       break;
2637     case 'x': // Pointer register pair X: r27:r26.
2638     case 'X':
2639       if (VT == MVT::i8 || VT == MVT::i16)
2640         return std::make_pair(unsigned(AVR::R27R26), &AVR::PTRREGSRegClass);
2641       break;
2642     case 'y': // Pointer register pair Y: r29:r28.
2643     case 'Y':
2644       if (VT == MVT::i8 || VT == MVT::i16)
2645         return std::make_pair(unsigned(AVR::R29R28), &AVR::PTRREGSRegClass);
2646       break;
2647     case 'z': // Pointer register pair Z: r31:r30.
2648     case 'Z':
2649       if (VT == MVT::i8 || VT == MVT::i16)
2650         return std::make_pair(unsigned(AVR::R31R30), &AVR::PTRREGSRegClass);
2651       break;
2652     default:
2653       break;
2654     }
2655   }
2656 
2657   return TargetLowering::getRegForInlineAsmConstraint(
2658       Subtarget.getRegisterInfo(), Constraint, VT);
2659 }
2660 
LowerAsmOperandForConstraint(SDValue Op,StringRef Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const2661 void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2662                                                      StringRef Constraint,
2663                                                      std::vector<SDValue> &Ops,
2664                                                      SelectionDAG &DAG) const {
2665   SDValue Result;
2666   SDLoc DL(Op);
2667   EVT Ty = Op.getValueType();
2668 
2669   // Currently only support length 1 constraints.
2670   if (Constraint.size() != 1) {
2671     return;
2672   }
2673 
2674   char ConstraintLetter = Constraint[0];
2675   switch (ConstraintLetter) {
2676   default:
2677     break;
2678   // Deal with integers first:
2679   case 'I':
2680   case 'J':
2681   case 'K':
2682   case 'L':
2683   case 'M':
2684   case 'N':
2685   case 'O':
2686   case 'P':
2687   case 'R': {
2688     const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
2689     if (!C) {
2690       return;
2691     }
2692 
2693     int64_t CVal64 = C->getSExtValue();
2694     uint64_t CUVal64 = C->getZExtValue();
2695     switch (ConstraintLetter) {
2696     case 'I': // 0..63
2697       if (!isUInt<6>(CUVal64))
2698         return;
2699       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2700       break;
2701     case 'J': // -63..0
2702       if (CVal64 < -63 || CVal64 > 0)
2703         return;
2704       Result = DAG.getTargetConstant(CVal64, DL, Ty);
2705       break;
2706     case 'K': // 2
2707       if (CUVal64 != 2)
2708         return;
2709       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2710       break;
2711     case 'L': // 0
2712       if (CUVal64 != 0)
2713         return;
2714       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2715       break;
2716     case 'M': // 0..255
2717       if (!isUInt<8>(CUVal64))
2718         return;
2719       // i8 type may be printed as a negative number,
2720       // e.g. 254 would be printed as -2,
2721       // so we force it to i16 at least.
2722       if (Ty.getSimpleVT() == MVT::i8) {
2723         Ty = MVT::i16;
2724       }
2725       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2726       break;
2727     case 'N': // -1
2728       if (CVal64 != -1)
2729         return;
2730       Result = DAG.getTargetConstant(CVal64, DL, Ty);
2731       break;
2732     case 'O': // 8, 16, 24
2733       if (CUVal64 != 8 && CUVal64 != 16 && CUVal64 != 24)
2734         return;
2735       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2736       break;
2737     case 'P': // 1
2738       if (CUVal64 != 1)
2739         return;
2740       Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2741       break;
2742     case 'R': // -6..5
2743       if (CVal64 < -6 || CVal64 > 5)
2744         return;
2745       Result = DAG.getTargetConstant(CVal64, DL, Ty);
2746       break;
2747     }
2748 
2749     break;
2750   }
2751   case 'G':
2752     const ConstantFPSDNode *FC = dyn_cast<ConstantFPSDNode>(Op);
2753     if (!FC || !FC->isZero())
2754       return;
2755     // Soften float to i8 0
2756     Result = DAG.getTargetConstant(0, DL, MVT::i8);
2757     break;
2758   }
2759 
2760   if (Result.getNode()) {
2761     Ops.push_back(Result);
2762     return;
2763   }
2764 
2765   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
2766 }
2767 
getRegisterByName(const char * RegName,LLT VT,const MachineFunction & MF) const2768 Register AVRTargetLowering::getRegisterByName(const char *RegName, LLT VT,
2769                                               const MachineFunction &MF) const {
2770   Register Reg;
2771 
2772   if (VT == LLT::scalar(8)) {
2773     Reg = StringSwitch<unsigned>(RegName)
2774               .Case("r0", AVR::R0)
2775               .Case("r1", AVR::R1)
2776               .Default(0);
2777   } else {
2778     Reg = StringSwitch<unsigned>(RegName)
2779               .Case("r0", AVR::R1R0)
2780               .Case("sp", AVR::SP)
2781               .Default(0);
2782   }
2783 
2784   if (Reg)
2785     return Reg;
2786 
2787   report_fatal_error(
2788       Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
2789 }
2790 
2791 } // end of namespace llvm
2792