1 //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the PowerPC-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // PPCGenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPC.h"
17 #include "PPCCallingConv.h"
18 #include "PPCISelLowering.h"
19 #include "PPCMachineFunctionInfo.h"
20 #include "PPCSubtarget.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/FastISel.h"
23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
24 #include "llvm/CodeGen/MachineConstantPool.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLowering.h"
29 #include "llvm/IR/CallingConv.h"
30 #include "llvm/IR/GetElementPtrTypeIterator.h"
31 #include "llvm/IR/GlobalVariable.h"
32 #include "llvm/IR/Operator.h"
33 #include "llvm/Target/TargetMachine.h"
34
35 //===----------------------------------------------------------------------===//
36 //
37 // TBD:
38 // fastLowerArguments: Handle simple cases.
39 // PPCMaterializeGV: Handle TLS.
40 // SelectCall: Handle function pointers.
41 // SelectCall: Handle multi-register return values.
42 // SelectCall: Optimize away nops for local calls.
43 // processCallArgs: Handle bit-converted arguments.
44 // finishCall: Handle multi-register return values.
45 // PPCComputeAddress: Handle parameter references as FrameIndex's.
46 // PPCEmitCmp: Handle immediate as operand 1.
47 // SelectCall: Handle small byval arguments.
48 // SelectIntrinsicCall: Implement.
49 // SelectSelect: Implement.
50 // Consider factoring isTypeLegal into the base class.
51 // Implement switches and jump tables.
52 //
53 //===----------------------------------------------------------------------===//
54 using namespace llvm;
55
56 #define DEBUG_TYPE "ppcfastisel"
57
58 namespace {
59
60 struct Address {
61 enum {
62 RegBase,
63 FrameIndexBase
64 } BaseType;
65
66 union {
67 unsigned Reg;
68 int FI;
69 } Base;
70
71 int64_t Offset;
72
73 // Innocuous defaults for our address.
Address__anon74dbb2330111::Address74 Address()
75 : BaseType(RegBase), Offset(0) {
76 Base.Reg = 0;
77 }
78 };
79
80 class PPCFastISel final : public FastISel {
81
82 const TargetMachine &TM;
83 const PPCSubtarget *Subtarget;
84 PPCFunctionInfo *PPCFuncInfo;
85 const TargetInstrInfo &TII;
86 const TargetLowering &TLI;
87 LLVMContext *Context;
88
89 public:
PPCFastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)90 explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
91 const TargetLibraryInfo *LibInfo)
92 : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
93 Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
94 PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
95 TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
96 Context(&FuncInfo.Fn->getContext()) {}
97
98 // Backend specific FastISel code.
99 private:
100 bool fastSelectInstruction(const Instruction *I) override;
101 Register fastMaterializeConstant(const Constant *C) override;
102 Register fastMaterializeAlloca(const AllocaInst *AI) override;
103 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
104 const LoadInst *LI) override;
105 bool fastLowerArguments() override;
106 Register fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
107 Register fastEmitInst_ri(unsigned MachineInstOpcode,
108 const TargetRegisterClass *RC, Register Op0,
109 uint64_t Imm);
110 Register fastEmitInst_r(unsigned MachineInstOpcode,
111 const TargetRegisterClass *RC, Register Op0);
112 Register fastEmitInst_rr(unsigned MachineInstOpcode,
113 const TargetRegisterClass *RC, Register Op0,
114 Register Op1);
115
116 bool fastLowerCall(CallLoweringInfo &CLI) override;
117
118 // Instruction selection routines.
119 private:
120 bool SelectLoad(const Instruction *I);
121 bool SelectStore(const Instruction *I);
122 bool SelectBranch(const Instruction *I);
123 bool SelectIndirectBr(const Instruction *I);
124 bool SelectFPExt(const Instruction *I);
125 bool SelectFPTrunc(const Instruction *I);
126 bool SelectIToFP(const Instruction *I, bool IsSigned);
127 bool SelectFPToI(const Instruction *I, bool IsSigned);
128 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
129 bool SelectRet(const Instruction *I);
130 bool SelectTrunc(const Instruction *I);
131 bool SelectIntExt(const Instruction *I);
132
133 // Utility routines.
134 private:
135 bool isTypeLegal(Type *Ty, MVT &VT);
136 bool isLoadTypeLegal(Type *Ty, MVT &VT);
137 bool isValueAvailable(const Value *V) const;
isVSFRCRegClass(const TargetRegisterClass * RC) const138 bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
139 return RC->getID() == PPC::VSFRCRegClassID;
140 }
isVSSRCRegClass(const TargetRegisterClass * RC) const141 bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
142 return RC->getID() == PPC::VSSRCRegClassID;
143 }
copyRegToRegClass(const TargetRegisterClass * ToRC,Register SrcReg,unsigned Flag=0,unsigned SubReg=0)144 Register copyRegToRegClass(const TargetRegisterClass *ToRC, Register SrcReg,
145 unsigned Flag = 0, unsigned SubReg = 0) {
146 Register TmpReg = createResultReg(ToRC);
147 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
148 TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
149 return TmpReg;
150 }
151 bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt,
152 Register DestReg, const PPC::Predicate Pred);
153 bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
154 const TargetRegisterClass *RC, bool IsZExt = true,
155 unsigned FP64LoadOpc = PPC::LFD);
156 bool PPCEmitStore(MVT VT, Register SrcReg, Address &Addr);
157 bool PPCComputeAddress(const Value *Obj, Address &Addr);
158 void PPCSimplifyAddress(Address &Addr, bool &UseOffset, Register &IndexReg);
159 bool PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, Register DestReg,
160 bool IsZExt);
161 Register PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
162 Register PPCMaterializeGV(const GlobalValue *GV, MVT VT);
163 Register PPCMaterializeInt(const ConstantInt *CI, MVT VT,
164 bool UseSExt = true);
165 Register PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
166 Register PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC);
167 Register PPCMoveToIntReg(const Instruction *I, MVT VT, Register SrcReg,
168 bool IsSigned);
169 Register PPCMoveToFPReg(MVT VT, Register SrcReg, bool IsSigned);
170
171 // Call handling routines.
172 private:
173 bool processCallArgs(SmallVectorImpl<Value *> &Args,
174 SmallVectorImpl<Register> &ArgRegs,
175 SmallVectorImpl<MVT> &ArgVTs,
176 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
177 SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
178 unsigned &NumBytes, bool IsVarArg);
179 bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
180
181 private:
182 #include "PPCGenFastISel.inc"
183
184 };
185
186 } // end anonymous namespace
187
getComparePred(CmpInst::Predicate Pred)188 static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
189 switch (Pred) {
190 // These are not representable with any single compare.
191 case CmpInst::FCMP_FALSE:
192 case CmpInst::FCMP_TRUE:
193 // Major concern about the following 6 cases is NaN result. The comparison
194 // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
195 // only one of which will be set. The result is generated by fcmpu
196 // instruction. However, bc instruction only inspects one of the first 3
197 // bits, so when un is set, bc instruction may jump to an undesired
198 // place.
199 //
200 // More specifically, if we expect an unordered comparison and un is set, we
201 // expect to always go to true branch; in such case UEQ, UGT and ULT still
202 // give false, which are undesired; but UNE, UGE, ULE happen to give true,
203 // since they are tested by inspecting !eq, !lt, !gt, respectively.
204 //
205 // Similarly, for ordered comparison, when un is set, we always expect the
206 // result to be false. In such case OGT, OLT and OEQ is good, since they are
207 // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
208 // and ONE are tested through !lt, !gt and !eq, and these are true.
209 case CmpInst::FCMP_UEQ:
210 case CmpInst::FCMP_UGT:
211 case CmpInst::FCMP_ULT:
212 case CmpInst::FCMP_OGE:
213 case CmpInst::FCMP_OLE:
214 case CmpInst::FCMP_ONE:
215 default:
216 return std::nullopt;
217
218 case CmpInst::FCMP_OEQ:
219 case CmpInst::ICMP_EQ:
220 return PPC::PRED_EQ;
221
222 case CmpInst::FCMP_OGT:
223 case CmpInst::ICMP_UGT:
224 case CmpInst::ICMP_SGT:
225 return PPC::PRED_GT;
226
227 case CmpInst::FCMP_UGE:
228 case CmpInst::ICMP_UGE:
229 case CmpInst::ICMP_SGE:
230 return PPC::PRED_GE;
231
232 case CmpInst::FCMP_OLT:
233 case CmpInst::ICMP_ULT:
234 case CmpInst::ICMP_SLT:
235 return PPC::PRED_LT;
236
237 case CmpInst::FCMP_ULE:
238 case CmpInst::ICMP_ULE:
239 case CmpInst::ICMP_SLE:
240 return PPC::PRED_LE;
241
242 case CmpInst::FCMP_UNE:
243 case CmpInst::ICMP_NE:
244 return PPC::PRED_NE;
245
246 case CmpInst::FCMP_ORD:
247 return PPC::PRED_NU;
248
249 case CmpInst::FCMP_UNO:
250 return PPC::PRED_UN;
251 }
252 }
253
254 // Determine whether the type Ty is simple enough to be handled by
255 // fast-isel, and return its equivalent machine type in VT.
256 // FIXME: Copied directly from ARM -- factor into base class?
isTypeLegal(Type * Ty,MVT & VT)257 bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
258 EVT Evt = TLI.getValueType(DL, Ty, true);
259
260 // Only handle simple types.
261 if (Evt == MVT::Other || !Evt.isSimple()) return false;
262 VT = Evt.getSimpleVT();
263
264 // Handle all legal types, i.e. a register that will directly hold this
265 // value.
266 return TLI.isTypeLegal(VT);
267 }
268
269 // Determine whether the type Ty is simple enough to be handled by
270 // fast-isel as a load target, and return its equivalent machine type in VT.
isLoadTypeLegal(Type * Ty,MVT & VT)271 bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
272 if (isTypeLegal(Ty, VT)) return true;
273
274 // If this is a type than can be sign or zero-extended to a basic operation
275 // go ahead and accept it now.
276 if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
277 return true;
278 }
279
280 return false;
281 }
282
isValueAvailable(const Value * V) const283 bool PPCFastISel::isValueAvailable(const Value *V) const {
284 if (!isa<Instruction>(V))
285 return true;
286
287 const auto *I = cast<Instruction>(V);
288 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
289 }
290
291 // Given a value Obj, create an Address object Addr that represents its
292 // address. Return false if we can't handle it.
PPCComputeAddress(const Value * Obj,Address & Addr)293 bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
294 const User *U = nullptr;
295 unsigned Opcode = Instruction::UserOp1;
296 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
297 // Don't walk into other basic blocks unless the object is an alloca from
298 // another block, otherwise it may not have a virtual register assigned.
299 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
300 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
301 Opcode = I->getOpcode();
302 U = I;
303 }
304 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
305 Opcode = C->getOpcode();
306 U = C;
307 }
308
309 switch (Opcode) {
310 default:
311 break;
312 case Instruction::BitCast:
313 // Look through bitcasts.
314 return PPCComputeAddress(U->getOperand(0), Addr);
315 case Instruction::IntToPtr:
316 // Look past no-op inttoptrs.
317 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
318 TLI.getPointerTy(DL))
319 return PPCComputeAddress(U->getOperand(0), Addr);
320 break;
321 case Instruction::PtrToInt:
322 // Look past no-op ptrtoints.
323 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
324 return PPCComputeAddress(U->getOperand(0), Addr);
325 break;
326 case Instruction::GetElementPtr: {
327 Address SavedAddr = Addr;
328 int64_t TmpOffset = Addr.Offset;
329
330 // Iterate through the GEP folding the constants into offsets where
331 // we can.
332 gep_type_iterator GTI = gep_type_begin(U);
333 for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
334 II != IE; ++II, ++GTI) {
335 const Value *Op = *II;
336 if (StructType *STy = GTI.getStructTypeOrNull()) {
337 const StructLayout *SL = DL.getStructLayout(STy);
338 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
339 TmpOffset += SL->getElementOffset(Idx);
340 } else {
341 uint64_t S = GTI.getSequentialElementStride(DL);
342 for (;;) {
343 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
344 // Constant-offset addressing.
345 TmpOffset += CI->getSExtValue() * S;
346 break;
347 }
348 if (canFoldAddIntoGEP(U, Op)) {
349 // A compatible add with a constant operand. Fold the constant.
350 ConstantInt *CI =
351 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
352 TmpOffset += CI->getSExtValue() * S;
353 // Iterate on the other operand.
354 Op = cast<AddOperator>(Op)->getOperand(0);
355 continue;
356 }
357 // Unsupported
358 goto unsupported_gep;
359 }
360 }
361 }
362
363 // Try to grab the base operand now.
364 Addr.Offset = TmpOffset;
365 if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
366
367 // We failed, restore everything and try the other options.
368 Addr = SavedAddr;
369
370 unsupported_gep:
371 break;
372 }
373 case Instruction::Alloca: {
374 const AllocaInst *AI = cast<AllocaInst>(Obj);
375 DenseMap<const AllocaInst*, int>::iterator SI =
376 FuncInfo.StaticAllocaMap.find(AI);
377 if (SI != FuncInfo.StaticAllocaMap.end()) {
378 Addr.BaseType = Address::FrameIndexBase;
379 Addr.Base.FI = SI->second;
380 return true;
381 }
382 break;
383 }
384 }
385
386 // FIXME: References to parameters fall through to the behavior
387 // below. They should be able to reference a frame index since
388 // they are stored to the stack, so we can get "ld rx, offset(r1)"
389 // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
390 // just contain the parameter. Try to handle this with a FI.
391
392 // Try to get this in a register if nothing else has worked.
393 if (Addr.Base.Reg == 0)
394 Addr.Base.Reg = getRegForValue(Obj);
395
396 // Prevent assignment of base register to X0, which is inappropriate
397 // for loads and stores alike.
398 if (Addr.Base.Reg != 0)
399 MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
400
401 return Addr.Base.Reg != 0;
402 }
403
404 // Fix up some addresses that can't be used directly. For example, if
405 // an offset won't fit in an instruction field, we may need to move it
406 // into an index register.
PPCSimplifyAddress(Address & Addr,bool & UseOffset,Register & IndexReg)407 void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
408 Register &IndexReg) {
409
410 // Check whether the offset fits in the instruction field.
411 if (!isInt<16>(Addr.Offset))
412 UseOffset = false;
413
414 // If this is a stack pointer and the offset needs to be simplified then
415 // put the alloca address into a register, set the base type back to
416 // register and continue. This should almost never happen.
417 if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
418 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
419 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
420 ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
421 Addr.Base.Reg = ResultReg;
422 Addr.BaseType = Address::RegBase;
423 }
424
425 if (!UseOffset) {
426 IntegerType *OffsetTy = Type::getInt64Ty(*Context);
427 const ConstantInt *Offset = ConstantInt::getSigned(OffsetTy, Addr.Offset);
428 IndexReg = PPCMaterializeInt(Offset, MVT::i64);
429 assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
430 }
431 }
432
433 // Emit a load instruction if possible, returning true if we succeeded,
434 // otherwise false. See commentary below for how the register class of
435 // the load is determined.
PPCEmitLoad(MVT VT,Register & ResultReg,Address & Addr,const TargetRegisterClass * RC,bool IsZExt,unsigned FP64LoadOpc)436 bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
437 const TargetRegisterClass *RC,
438 bool IsZExt, unsigned FP64LoadOpc) {
439 unsigned Opc;
440 bool UseOffset = true;
441 bool HasSPE = Subtarget->hasSPE();
442
443 // If ResultReg is given, it determines the register class of the load.
444 // Otherwise, RC is the register class to use. If the result of the
445 // load isn't anticipated in this block, both may be zero, in which
446 // case we must make a conservative guess. In particular, don't assign
447 // R0 or X0 to the result register, as the result may be used in a load,
448 // store, add-immediate, or isel that won't permit this. (Though
449 // perhaps the spill and reload of live-exit values would handle this?)
450 const TargetRegisterClass *UseRC =
451 (ResultReg ? MRI.getRegClass(ResultReg) :
452 (RC ? RC :
453 (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
454 (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
455 (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
456 &PPC::GPRC_and_GPRC_NOR0RegClass)))));
457
458 bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
459
460 switch (VT.SimpleTy) {
461 default: // e.g., vector types not handled
462 return false;
463 case MVT::i8:
464 Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
465 break;
466 case MVT::i16:
467 Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
468 : (Is32BitInt ? PPC::LHA : PPC::LHA8));
469 break;
470 case MVT::i32:
471 Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
472 : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
473 if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
474 UseOffset = false;
475 break;
476 case MVT::i64:
477 Opc = PPC::LD;
478 assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
479 "64-bit load with 32-bit target??");
480 UseOffset = ((Addr.Offset & 3) == 0);
481 break;
482 case MVT::f32:
483 Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
484 break;
485 case MVT::f64:
486 Opc = FP64LoadOpc;
487 break;
488 }
489
490 // If necessary, materialize the offset into a register and use
491 // the indexed form. Also handle stack pointers with special needs.
492 Register IndexReg;
493 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
494
495 // If this is a potential VSX load with an offset of 0, a VSX indexed load can
496 // be used.
497 bool IsVSSRC = isVSSRCRegClass(UseRC);
498 bool IsVSFRC = isVSFRCRegClass(UseRC);
499 bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
500 bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
501 if ((Is32VSXLoad || Is64VSXLoad) &&
502 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
503 (Addr.Offset == 0)) {
504 UseOffset = false;
505 }
506
507 if (!ResultReg)
508 ResultReg = createResultReg(UseRC);
509
510 // Note: If we still have a frame index here, we know the offset is
511 // in range, as otherwise PPCSimplifyAddress would have converted it
512 // into a RegBase.
513 if (Addr.BaseType == Address::FrameIndexBase) {
514 // VSX only provides an indexed load.
515 if (Is32VSXLoad || Is64VSXLoad) return false;
516
517 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
518 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
519 Addr.Offset),
520 MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
521 MFI.getObjectAlign(Addr.Base.FI));
522
523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
524 .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
525
526 // Base reg with offset in range.
527 } else if (UseOffset) {
528 // VSX only provides an indexed load.
529 if (Is32VSXLoad || Is64VSXLoad) return false;
530
531 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
532 .addImm(Addr.Offset).addReg(Addr.Base.Reg);
533
534 // Indexed form.
535 } else {
536 // Get the RR opcode corresponding to the RI one. FIXME: It would be
537 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
538 // is hard to get at.
539 switch (Opc) {
540 default: llvm_unreachable("Unexpected opcode!");
541 case PPC::LBZ: Opc = PPC::LBZX; break;
542 case PPC::LBZ8: Opc = PPC::LBZX8; break;
543 case PPC::LHZ: Opc = PPC::LHZX; break;
544 case PPC::LHZ8: Opc = PPC::LHZX8; break;
545 case PPC::LHA: Opc = PPC::LHAX; break;
546 case PPC::LHA8: Opc = PPC::LHAX8; break;
547 case PPC::LWZ: Opc = PPC::LWZX; break;
548 case PPC::LWZ8: Opc = PPC::LWZX8; break;
549 case PPC::LWA: Opc = PPC::LWAX; break;
550 case PPC::LWA_32: Opc = PPC::LWAX_32; break;
551 case PPC::LD: Opc = PPC::LDX; break;
552 case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
553 case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
554 case PPC::EVLDD: Opc = PPC::EVLDDX; break;
555 case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
556 }
557
558 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
559 ResultReg);
560
561 // If we have an index register defined we use it in the store inst,
562 // otherwise we use X0 as base as it makes the vector instructions to
563 // use zero in the computation of the effective address regardless the
564 // content of the register.
565 if (IndexReg)
566 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
567 else
568 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
569 }
570
571 return true;
572 }
573
574 // Attempt to fast-select a load instruction.
SelectLoad(const Instruction * I)575 bool PPCFastISel::SelectLoad(const Instruction *I) {
576 // FIXME: No atomic loads are supported.
577 if (cast<LoadInst>(I)->isAtomic())
578 return false;
579
580 // Verify we have a legal type before going any further.
581 MVT VT;
582 if (!isLoadTypeLegal(I->getType(), VT))
583 return false;
584
585 // See if we can handle this address.
586 Address Addr;
587 if (!PPCComputeAddress(I->getOperand(0), Addr))
588 return false;
589
590 // Look at the currently assigned register for this instruction
591 // to determine the required register class. This is necessary
592 // to constrain RA from using R0/X0 when this is not legal.
593 Register AssignedReg = FuncInfo.ValueMap[I];
594 const TargetRegisterClass *RC =
595 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
596
597 Register ResultReg = 0;
598 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
599 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
600 return false;
601 updateValueMap(I, ResultReg);
602 return true;
603 }
604
605 // Emit a store instruction to store SrcReg at Addr.
PPCEmitStore(MVT VT,Register SrcReg,Address & Addr)606 bool PPCFastISel::PPCEmitStore(MVT VT, Register SrcReg, Address &Addr) {
607 assert(SrcReg && "Nothing to store!");
608 unsigned Opc;
609 bool UseOffset = true;
610
611 const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
612 bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
613
614 switch (VT.SimpleTy) {
615 default: // e.g., vector types not handled
616 return false;
617 case MVT::i8:
618 Opc = Is32BitInt ? PPC::STB : PPC::STB8;
619 break;
620 case MVT::i16:
621 Opc = Is32BitInt ? PPC::STH : PPC::STH8;
622 break;
623 case MVT::i32:
624 assert(Is32BitInt && "Not GPRC for i32??");
625 Opc = PPC::STW;
626 break;
627 case MVT::i64:
628 Opc = PPC::STD;
629 UseOffset = ((Addr.Offset & 3) == 0);
630 break;
631 case MVT::f32:
632 Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
633 break;
634 case MVT::f64:
635 Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
636 break;
637 }
638
639 // If necessary, materialize the offset into a register and use
640 // the indexed form. Also handle stack pointers with special needs.
641 Register IndexReg;
642 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
643
644 // If this is a potential VSX store with an offset of 0, a VSX indexed store
645 // can be used.
646 bool IsVSSRC = isVSSRCRegClass(RC);
647 bool IsVSFRC = isVSFRCRegClass(RC);
648 bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
649 bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
650 if ((Is32VSXStore || Is64VSXStore) &&
651 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
652 (Addr.Offset == 0)) {
653 UseOffset = false;
654 }
655
656 // Note: If we still have a frame index here, we know the offset is
657 // in range, as otherwise PPCSimplifyAddress would have converted it
658 // into a RegBase.
659 if (Addr.BaseType == Address::FrameIndexBase) {
660 // VSX only provides an indexed store.
661 if (Is32VSXStore || Is64VSXStore) return false;
662
663 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
664 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
665 Addr.Offset),
666 MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
667 MFI.getObjectAlign(Addr.Base.FI));
668
669 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
670 .addReg(SrcReg)
671 .addImm(Addr.Offset)
672 .addFrameIndex(Addr.Base.FI)
673 .addMemOperand(MMO);
674
675 // Base reg with offset in range.
676 } else if (UseOffset) {
677 // VSX only provides an indexed store.
678 if (Is32VSXStore || Is64VSXStore)
679 return false;
680
681 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
682 .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
683
684 // Indexed form.
685 } else {
686 // Get the RR opcode corresponding to the RI one. FIXME: It would be
687 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
688 // is hard to get at.
689 switch (Opc) {
690 default: llvm_unreachable("Unexpected opcode!");
691 case PPC::STB: Opc = PPC::STBX; break;
692 case PPC::STH : Opc = PPC::STHX; break;
693 case PPC::STW : Opc = PPC::STWX; break;
694 case PPC::STB8: Opc = PPC::STBX8; break;
695 case PPC::STH8: Opc = PPC::STHX8; break;
696 case PPC::STW8: Opc = PPC::STWX8; break;
697 case PPC::STD: Opc = PPC::STDX; break;
698 case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
699 case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
700 case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
701 case PPC::SPESTW: Opc = PPC::SPESTWX; break;
702 }
703
704 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
705 .addReg(SrcReg);
706
707 // If we have an index register defined we use it in the store inst,
708 // otherwise we use X0 as base as it makes the vector instructions to
709 // use zero in the computation of the effective address regardless the
710 // content of the register.
711 if (IndexReg)
712 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
713 else
714 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
715 }
716
717 return true;
718 }
719
720 // Attempt to fast-select a store instruction.
SelectStore(const Instruction * I)721 bool PPCFastISel::SelectStore(const Instruction *I) {
722 Value *Op0 = I->getOperand(0);
723 Register SrcReg;
724
725 // FIXME: No atomics loads are supported.
726 if (cast<StoreInst>(I)->isAtomic())
727 return false;
728
729 // Verify we have a legal type before going any further.
730 MVT VT;
731 if (!isLoadTypeLegal(Op0->getType(), VT))
732 return false;
733
734 // Get the value to be stored into a register.
735 SrcReg = getRegForValue(Op0);
736 if (!SrcReg)
737 return false;
738
739 // See if we can handle this address.
740 Address Addr;
741 if (!PPCComputeAddress(I->getOperand(1), Addr))
742 return false;
743
744 if (!PPCEmitStore(VT, SrcReg, Addr))
745 return false;
746
747 return true;
748 }
749
750 // Attempt to fast-select a branch instruction.
SelectBranch(const Instruction * I)751 bool PPCFastISel::SelectBranch(const Instruction *I) {
752 const BranchInst *BI = cast<BranchInst>(I);
753 MachineBasicBlock *BrBB = FuncInfo.MBB;
754 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
755 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
756
757 // For now, just try the simplest case where it's fed by a compare.
758 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
759 if (isValueAvailable(CI)) {
760 std::optional<PPC::Predicate> OptPPCPred =
761 getComparePred(CI->getPredicate());
762 if (!OptPPCPred)
763 return false;
764
765 PPC::Predicate PPCPred = *OptPPCPred;
766
767 // Take advantage of fall-through opportunities.
768 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
769 std::swap(TBB, FBB);
770 PPCPred = PPC::InvertPredicate(PPCPred);
771 }
772
773 Register CondReg = createResultReg(&PPC::CRRCRegClass);
774
775 if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
776 CondReg, PPCPred))
777 return false;
778
779 BuildMI(*BrBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCC))
780 .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
781 .addReg(CondReg)
782 .addMBB(TBB);
783 finishCondBranch(BI->getParent(), TBB, FBB);
784 return true;
785 }
786 } else if (const ConstantInt *CI =
787 dyn_cast<ConstantInt>(BI->getCondition())) {
788 uint64_t Imm = CI->getZExtValue();
789 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
790 fastEmitBranch(Target, MIMD.getDL());
791 return true;
792 }
793
794 // FIXME: ARM looks for a case where the block containing the compare
795 // has been split from the block containing the branch. If this happens,
796 // there is a vreg available containing the result of the compare. I'm
797 // not sure we can do much, as we've lost the predicate information with
798 // the compare instruction -- we have a 4-bit CR but don't know which bit
799 // to test here.
800 return false;
801 }
802
803 // Attempt to emit a compare of the two source values. Signed and unsigned
804 // comparisons are supported. Return false if we can't handle it.
PPCEmitCmp(const Value * SrcValue1,const Value * SrcValue2,bool IsZExt,Register DestReg,const PPC::Predicate Pred)805 bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
806 bool IsZExt, Register DestReg,
807 const PPC::Predicate Pred) {
808 Type *Ty = SrcValue1->getType();
809 EVT SrcEVT = TLI.getValueType(DL, Ty, true);
810 if (!SrcEVT.isSimple())
811 return false;
812 MVT SrcVT = SrcEVT.getSimpleVT();
813
814 if (SrcVT == MVT::i1 && Subtarget->useCRBits())
815 return false;
816
817 // See if operand 2 is an immediate encodeable in the compare.
818 // FIXME: Operands are not in canonical order at -O0, so an immediate
819 // operand in position 1 is a lost opportunity for now. We are
820 // similar to ARM in this regard.
821 int64_t Imm = 0;
822 bool UseImm = false;
823 const bool HasSPE = Subtarget->hasSPE();
824
825 // Only 16-bit integer constants can be represented in compares for
826 // PowerPC. Others will be materialized into a register.
827 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
828 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
829 SrcVT == MVT::i8 || SrcVT == MVT::i1) {
830 const APInt &CIVal = ConstInt->getValue();
831 Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
832 (int64_t)CIVal.getSExtValue();
833 if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
834 UseImm = true;
835 }
836 }
837
838 Register SrcReg1 = getRegForValue(SrcValue1);
839 if (!SrcReg1)
840 return false;
841
842 Register SrcReg2;
843 if (!UseImm) {
844 SrcReg2 = getRegForValue(SrcValue2);
845 if (!SrcReg2)
846 return false;
847 }
848
849 unsigned CmpOpc;
850 bool NeedsExt = false;
851
852 auto RC1 = MRI.getRegClass(SrcReg1);
853 auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
854
855 switch (SrcVT.SimpleTy) {
856 default: return false;
857 case MVT::f32:
858 if (HasSPE) {
859 switch (Pred) {
860 default: return false;
861 case PPC::PRED_EQ:
862 CmpOpc = PPC::EFSCMPEQ;
863 break;
864 case PPC::PRED_LT:
865 CmpOpc = PPC::EFSCMPLT;
866 break;
867 case PPC::PRED_GT:
868 CmpOpc = PPC::EFSCMPGT;
869 break;
870 }
871 } else {
872 CmpOpc = PPC::FCMPUS;
873 if (isVSSRCRegClass(RC1))
874 SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
875 if (RC2 && isVSSRCRegClass(RC2))
876 SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
877 }
878 break;
879 case MVT::f64:
880 if (HasSPE) {
881 switch (Pred) {
882 default: return false;
883 case PPC::PRED_EQ:
884 CmpOpc = PPC::EFDCMPEQ;
885 break;
886 case PPC::PRED_LT:
887 CmpOpc = PPC::EFDCMPLT;
888 break;
889 case PPC::PRED_GT:
890 CmpOpc = PPC::EFDCMPGT;
891 break;
892 }
893 } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
894 CmpOpc = PPC::XSCMPUDP;
895 } else {
896 CmpOpc = PPC::FCMPUD;
897 }
898 break;
899 case MVT::i1:
900 case MVT::i8:
901 case MVT::i16:
902 NeedsExt = true;
903 [[fallthrough]];
904 case MVT::i32:
905 if (!UseImm)
906 CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
907 else
908 CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
909 break;
910 case MVT::i64:
911 if (!UseImm)
912 CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
913 else
914 CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
915 break;
916 }
917
918 if (NeedsExt) {
919 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
920 if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
921 return false;
922 SrcReg1 = ExtReg;
923
924 if (!UseImm) {
925 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
926 if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
927 return false;
928 SrcReg2 = ExtReg;
929 }
930 }
931
932 if (!UseImm)
933 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
934 .addReg(SrcReg1).addReg(SrcReg2);
935 else
936 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
937 .addReg(SrcReg1).addImm(Imm);
938
939 return true;
940 }
941
942 // Attempt to fast-select a floating-point extend instruction.
SelectFPExt(const Instruction * I)943 bool PPCFastISel::SelectFPExt(const Instruction *I) {
944 Value *Src = I->getOperand(0);
945 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
946 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
947
948 if (SrcVT != MVT::f32 || DestVT != MVT::f64)
949 return false;
950
951 Register SrcReg = getRegForValue(Src);
952 if (!SrcReg)
953 return false;
954
955 // No code is generated for a FP extend.
956 updateValueMap(I, SrcReg);
957 return true;
958 }
959
960 // Attempt to fast-select a floating-point truncate instruction.
SelectFPTrunc(const Instruction * I)961 bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
962 Value *Src = I->getOperand(0);
963 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
964 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
965
966 if (SrcVT != MVT::f64 || DestVT != MVT::f32)
967 return false;
968
969 Register SrcReg = getRegForValue(Src);
970 if (!SrcReg)
971 return false;
972
973 // Round the result to single precision.
974 Register DestReg;
975 auto RC = MRI.getRegClass(SrcReg);
976 if (Subtarget->hasSPE()) {
977 DestReg = createResultReg(&PPC::GPRCRegClass);
978 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::EFSCFD),
979 DestReg)
980 .addReg(SrcReg);
981 } else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
982 DestReg = createResultReg(&PPC::VSSRCRegClass);
983 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::XSRSP),
984 DestReg)
985 .addReg(SrcReg);
986 } else {
987 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
988 DestReg = createResultReg(&PPC::F4RCRegClass);
989 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
990 TII.get(PPC::FRSP), DestReg)
991 .addReg(SrcReg);
992 }
993
994 updateValueMap(I, DestReg);
995 return true;
996 }
997
998 // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
999 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
1000 // those should be used instead of moving via a stack slot when the
1001 // subtarget permits.
1002 // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1003 // stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1004 // case to 8 bytes which produces tighter code but wastes stack space.
PPCMoveToFPReg(MVT SrcVT,Register SrcReg,bool IsSigned)1005 Register PPCFastISel::PPCMoveToFPReg(MVT SrcVT, Register SrcReg,
1006 bool IsSigned) {
1007
1008 // If necessary, extend 32-bit int to 64-bit.
1009 if (SrcVT == MVT::i32) {
1010 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1011 if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
1012 return Register();
1013 SrcReg = TmpReg;
1014 }
1015
1016 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1017 Address Addr;
1018 Addr.BaseType = Address::FrameIndexBase;
1019 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1020
1021 // Store the value from the GPR.
1022 if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
1023 return Register();
1024
1025 // Load the integer value into an FPR. The kind of load used depends
1026 // on a number of conditions.
1027 unsigned LoadOpc = PPC::LFD;
1028
1029 if (SrcVT == MVT::i32) {
1030 if (!IsSigned) {
1031 LoadOpc = PPC::LFIWZX;
1032 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1033 } else if (Subtarget->hasLFIWAX()) {
1034 LoadOpc = PPC::LFIWAX;
1035 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1036 }
1037 }
1038
1039 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1040 Register ResultReg;
1041 if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1042 return Register();
1043
1044 return ResultReg;
1045 }
1046
1047 // Attempt to fast-select an integer-to-floating-point conversion.
1048 // FIXME: Once fast-isel has better support for VSX, conversions using
1049 // direct moves should be implemented.
SelectIToFP(const Instruction * I,bool IsSigned)1050 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1051 MVT DstVT;
1052 Type *DstTy = I->getType();
1053 if (!isTypeLegal(DstTy, DstVT))
1054 return false;
1055
1056 if (DstVT != MVT::f32 && DstVT != MVT::f64)
1057 return false;
1058
1059 Value *Src = I->getOperand(0);
1060 EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1061 if (!SrcEVT.isSimple())
1062 return false;
1063
1064 MVT SrcVT = SrcEVT.getSimpleVT();
1065
1066 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1067 SrcVT != MVT::i32 && SrcVT != MVT::i64)
1068 return false;
1069
1070 Register SrcReg = getRegForValue(Src);
1071 if (!SrcReg)
1072 return false;
1073
1074 // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1075 if (Subtarget->hasSPE()) {
1076 unsigned Opc;
1077 if (DstVT == MVT::f32)
1078 Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1079 else
1080 Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1081
1082 Register DestReg = createResultReg(&PPC::SPERCRegClass);
1083 // Generate the convert.
1084 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1085 .addReg(SrcReg);
1086 updateValueMap(I, DestReg);
1087 return true;
1088 }
1089
1090 // We can only lower an unsigned convert if we have the newer
1091 // floating-point conversion operations.
1092 if (!IsSigned && !Subtarget->hasFPCVT())
1093 return false;
1094
1095 // FIXME: For now we require the newer floating-point conversion operations
1096 // (which are present only on P7 and A2 server models) when converting
1097 // to single-precision float. Otherwise we have to generate a lot of
1098 // fiddly code to avoid double rounding. If necessary, the fiddly code
1099 // can be found in PPCTargetLowering::LowerINT_TO_FP().
1100 if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1101 return false;
1102
1103 // Extend the input if necessary.
1104 if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1105 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1106 if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1107 return false;
1108 SrcVT = MVT::i64;
1109 SrcReg = TmpReg;
1110 }
1111
1112 // Move the integer value to an FPR.
1113 Register FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1114 if (!FPReg)
1115 return false;
1116
1117 // Determine the opcode for the conversion.
1118 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1119 Register DestReg = createResultReg(RC);
1120 unsigned Opc;
1121
1122 if (DstVT == MVT::f32)
1123 Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1124 else
1125 Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1126
1127 // Generate the convert.
1128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1129 .addReg(FPReg);
1130
1131 updateValueMap(I, DestReg);
1132 return true;
1133 }
1134
1135 // Move the floating-point value in SrcReg into an integer destination
1136 // register, and return the register (or zero if we can't handle it).
1137 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
1138 // those should be used instead of moving via a stack slot when the
1139 // subtarget permits.
PPCMoveToIntReg(const Instruction * I,MVT VT,Register SrcReg,bool IsSigned)1140 Register PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1141 Register SrcReg, bool IsSigned) {
1142 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1143 // Note that if have STFIWX available, we could use a 4-byte stack
1144 // slot for i32, but this being fast-isel we'll just go with the
1145 // easiest code gen possible.
1146 Address Addr;
1147 Addr.BaseType = Address::FrameIndexBase;
1148 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1149
1150 // Store the value from the FPR.
1151 if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1152 return Register();
1153
1154 // Reload it into a GPR. If we want an i32 on big endian, modify the
1155 // address to have a 4-byte offset so we load from the right place.
1156 if (VT == MVT::i32)
1157 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1158
1159 // Look at the currently assigned register for this instruction
1160 // to determine the required register class.
1161 Register AssignedReg = FuncInfo.ValueMap[I];
1162 const TargetRegisterClass *RC =
1163 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1164
1165 Register ResultReg;
1166 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1167 return Register();
1168
1169 return ResultReg;
1170 }
1171
1172 // Attempt to fast-select a floating-point-to-integer conversion.
1173 // FIXME: Once fast-isel has better support for VSX, conversions using
1174 // direct moves should be implemented.
SelectFPToI(const Instruction * I,bool IsSigned)1175 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1176 MVT DstVT, SrcVT;
1177 Type *DstTy = I->getType();
1178 if (!isTypeLegal(DstTy, DstVT))
1179 return false;
1180
1181 if (DstVT != MVT::i32 && DstVT != MVT::i64)
1182 return false;
1183
1184 // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1185 if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1186 !Subtarget->hasSPE())
1187 return false;
1188
1189 Value *Src = I->getOperand(0);
1190 Type *SrcTy = Src->getType();
1191 if (!isTypeLegal(SrcTy, SrcVT))
1192 return false;
1193
1194 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1195 return false;
1196
1197 Register SrcReg = getRegForValue(Src);
1198 if (!SrcReg)
1199 return false;
1200
1201 // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1202 // meaningless copy to get the register class right.
1203 const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1204 if (InRC == &PPC::F4RCRegClass)
1205 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
1206 else if (InRC == &PPC::VSSRCRegClass)
1207 SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
1208
1209 // Determine the opcode for the conversion, which takes place
1210 // entirely within FPRs or VSRs.
1211 Register DestReg;
1212 unsigned Opc;
1213 auto RC = MRI.getRegClass(SrcReg);
1214
1215 if (Subtarget->hasSPE()) {
1216 DestReg = createResultReg(&PPC::GPRCRegClass);
1217 if (IsSigned)
1218 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1219 else
1220 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1221 } else if (isVSFRCRegClass(RC)) {
1222 DestReg = createResultReg(&PPC::VSFRCRegClass);
1223 if (DstVT == MVT::i32)
1224 Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1225 else
1226 Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1227 } else {
1228 DestReg = createResultReg(&PPC::F8RCRegClass);
1229 if (DstVT == MVT::i32)
1230 if (IsSigned)
1231 Opc = PPC::FCTIWZ;
1232 else
1233 Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1234 else
1235 Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1236 }
1237
1238 // Generate the convert.
1239 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1240 .addReg(SrcReg);
1241
1242 // Now move the integer value from a float register to an integer register.
1243 Register IntReg = Subtarget->hasSPE()
1244 ? DestReg
1245 : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1246
1247 if (!IntReg)
1248 return false;
1249
1250 updateValueMap(I, IntReg);
1251 return true;
1252 }
1253
1254 // Attempt to fast-select a binary integer operation that isn't already
1255 // handled automatically.
SelectBinaryIntOp(const Instruction * I,unsigned ISDOpcode)1256 bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1257 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1258
1259 // We can get here in the case when we have a binary operation on a non-legal
1260 // type and the target independent selector doesn't know how to handle it.
1261 if (DestVT != MVT::i16 && DestVT != MVT::i8)
1262 return false;
1263
1264 // Look at the currently assigned register for this instruction
1265 // to determine the required register class. If there is no register,
1266 // make a conservative choice (don't assign R0).
1267 Register AssignedReg = FuncInfo.ValueMap[I];
1268 const TargetRegisterClass *RC =
1269 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1270 &PPC::GPRC_and_GPRC_NOR0RegClass);
1271 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1272
1273 unsigned Opc;
1274 switch (ISDOpcode) {
1275 default: return false;
1276 case ISD::ADD:
1277 Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1278 break;
1279 case ISD::OR:
1280 Opc = IsGPRC ? PPC::OR : PPC::OR8;
1281 break;
1282 case ISD::SUB:
1283 Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1284 break;
1285 }
1286
1287 Register ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1288 Register SrcReg1 = getRegForValue(I->getOperand(0));
1289 if (!SrcReg1)
1290 return false;
1291
1292 // Handle case of small immediate operand.
1293 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1294 const APInt &CIVal = ConstInt->getValue();
1295 int Imm = (int)CIVal.getSExtValue();
1296 bool UseImm = true;
1297 if (isInt<16>(Imm)) {
1298 switch (Opc) {
1299 default:
1300 llvm_unreachable("Missing case!");
1301 case PPC::ADD4:
1302 Opc = PPC::ADDI;
1303 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1304 break;
1305 case PPC::ADD8:
1306 Opc = PPC::ADDI8;
1307 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1308 break;
1309 case PPC::OR:
1310 Opc = PPC::ORI;
1311 break;
1312 case PPC::OR8:
1313 Opc = PPC::ORI8;
1314 break;
1315 case PPC::SUBF:
1316 if (Imm == -32768)
1317 UseImm = false;
1318 else {
1319 Opc = PPC::ADDI;
1320 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1321 Imm = -Imm;
1322 }
1323 break;
1324 case PPC::SUBF8:
1325 if (Imm == -32768)
1326 UseImm = false;
1327 else {
1328 Opc = PPC::ADDI8;
1329 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1330 Imm = -Imm;
1331 }
1332 break;
1333 }
1334
1335 if (UseImm) {
1336 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
1337 ResultReg)
1338 .addReg(SrcReg1)
1339 .addImm(Imm);
1340 updateValueMap(I, ResultReg);
1341 return true;
1342 }
1343 }
1344 }
1345
1346 // Reg-reg case.
1347 Register SrcReg2 = getRegForValue(I->getOperand(1));
1348 if (!SrcReg2)
1349 return false;
1350
1351 // Reverse operands for subtract-from.
1352 if (ISDOpcode == ISD::SUB)
1353 std::swap(SrcReg1, SrcReg2);
1354
1355 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
1356 .addReg(SrcReg1).addReg(SrcReg2);
1357 updateValueMap(I, ResultReg);
1358 return true;
1359 }
1360
1361 // Handle arguments to a call that we're attempting to fast-select.
1362 // Return false if the arguments are too complex for us at the moment.
processCallArgs(SmallVectorImpl<Value * > & Args,SmallVectorImpl<Register> & ArgRegs,SmallVectorImpl<MVT> & ArgVTs,SmallVectorImpl<ISD::ArgFlagsTy> & ArgFlags,SmallVectorImpl<unsigned> & RegArgs,CallingConv::ID CC,unsigned & NumBytes,bool IsVarArg)1363 bool PPCFastISel::processCallArgs(SmallVectorImpl<Value *> &Args,
1364 SmallVectorImpl<Register> &ArgRegs,
1365 SmallVectorImpl<MVT> &ArgVTs,
1366 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1367 SmallVectorImpl<unsigned> &RegArgs,
1368 CallingConv::ID CC, unsigned &NumBytes,
1369 bool IsVarArg) {
1370 SmallVector<CCValAssign, 16> ArgLocs;
1371 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1372
1373 // Reserve space for the linkage area on the stack.
1374 unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1375 CCInfo.AllocateStack(LinkageSize, Align(8));
1376
1377 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
1378
1379 // Bail out if we can't handle any of the arguments.
1380 for (const CCValAssign &VA : ArgLocs) {
1381 MVT ArgVT = ArgVTs[VA.getValNo()];
1382
1383 // Skip vector arguments for now, as well as long double and
1384 // uint128_t, and anything that isn't passed in a register.
1385 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1386 !VA.isRegLoc() || VA.needsCustom())
1387 return false;
1388
1389 // Skip bit-converted arguments for now.
1390 if (VA.getLocInfo() == CCValAssign::BCvt)
1391 return false;
1392 }
1393
1394 // Get a count of how many bytes are to be pushed onto the stack.
1395 NumBytes = CCInfo.getStackSize();
1396
1397 // The prolog code of the callee may store up to 8 GPR argument registers to
1398 // the stack, allowing va_start to index over them in memory if its varargs.
1399 // Because we cannot tell if this is needed on the caller side, we have to
1400 // conservatively assume that it is needed. As such, make sure we have at
1401 // least enough stack space for the caller to store the 8 GPRs.
1402 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1403 NumBytes = std::max(NumBytes, LinkageSize + 64);
1404
1405 // Issue CALLSEQ_START.
1406 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1407 TII.get(TII.getCallFrameSetupOpcode()))
1408 .addImm(NumBytes).addImm(0);
1409
1410 // Prepare to assign register arguments. Every argument uses up a
1411 // GPR protocol register even if it's passed in a floating-point
1412 // register (unless we're using the fast calling convention).
1413 unsigned NextGPR = PPC::X3;
1414 unsigned NextFPR = PPC::F1;
1415
1416 // Process arguments.
1417 for (const CCValAssign &VA : ArgLocs) {
1418 Register Arg = ArgRegs[VA.getValNo()];
1419 MVT ArgVT = ArgVTs[VA.getValNo()];
1420
1421 // Handle argument promotion and bitcasts.
1422 switch (VA.getLocInfo()) {
1423 default:
1424 llvm_unreachable("Unknown loc info!");
1425 case CCValAssign::Full:
1426 break;
1427 case CCValAssign::SExt: {
1428 MVT DestVT = VA.getLocVT();
1429 const TargetRegisterClass *RC =
1430 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1431 Register TmpReg = createResultReg(RC);
1432 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1433 llvm_unreachable("Failed to emit a sext!");
1434 ArgVT = DestVT;
1435 Arg = TmpReg;
1436 break;
1437 }
1438 case CCValAssign::AExt:
1439 case CCValAssign::ZExt: {
1440 MVT DestVT = VA.getLocVT();
1441 const TargetRegisterClass *RC =
1442 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1443 Register TmpReg = createResultReg(RC);
1444 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1445 llvm_unreachable("Failed to emit a zext!");
1446 ArgVT = DestVT;
1447 Arg = TmpReg;
1448 break;
1449 }
1450 case CCValAssign::BCvt: {
1451 // FIXME: Not yet handled.
1452 llvm_unreachable("Should have bailed before getting here!");
1453 break;
1454 }
1455 }
1456
1457 // Copy this argument to the appropriate register.
1458 unsigned ArgReg;
1459 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1460 ArgReg = NextFPR++;
1461 if (CC != CallingConv::Fast)
1462 ++NextGPR;
1463 } else
1464 ArgReg = NextGPR++;
1465
1466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1467 TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1468 RegArgs.push_back(ArgReg);
1469 }
1470
1471 return true;
1472 }
1473
1474 // For a call that we've determined we can fast-select, finish the
1475 // call sequence and generate a copy to obtain the return value (if any).
finishCall(MVT RetVT,CallLoweringInfo & CLI,unsigned & NumBytes)1476 bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1477 CallingConv::ID CC = CLI.CallConv;
1478
1479 // Issue CallSEQ_END.
1480 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1481 TII.get(TII.getCallFrameDestroyOpcode()))
1482 .addImm(NumBytes).addImm(0);
1483
1484 // Next, generate a copy to obtain the return value.
1485 // FIXME: No multi-register return values yet, though I don't foresee
1486 // any real difficulties there.
1487 if (RetVT != MVT::isVoid) {
1488 SmallVector<CCValAssign, 16> RVLocs;
1489 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1490 CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1491 CCValAssign &VA = RVLocs[0];
1492 assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1493 assert(VA.isRegLoc() && "Can only return in registers!");
1494
1495 MVT DestVT = VA.getValVT();
1496 MVT CopyVT = DestVT;
1497
1498 // Ints smaller than a register still arrive in a full 64-bit
1499 // register, so make sure we recognize this.
1500 if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1501 CopyVT = MVT::i64;
1502
1503 Register SourcePhysReg = VA.getLocReg();
1504 Register ResultReg;
1505
1506 if (RetVT == CopyVT) {
1507 const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1508 ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
1509
1510 // If necessary, round the floating result to single precision.
1511 } else if (CopyVT == MVT::f64) {
1512 ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1513 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::FRSP),
1514 ResultReg).addReg(SourcePhysReg);
1515
1516 // If only the low half of a general register is needed, generate
1517 // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1518 // used along the fast-isel path (not lowered), and downstream logic
1519 // also doesn't like a direct subreg copy on a physical reg.)
1520 } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1521 // Convert physical register from G8RC to GPRC.
1522 SourcePhysReg = (SourcePhysReg - PPC::X0) + PPC::R0;
1523 ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
1524 }
1525
1526 assert(ResultReg && "ResultReg unset!");
1527 CLI.InRegs.push_back(SourcePhysReg);
1528 CLI.ResultReg = ResultReg;
1529 CLI.NumResultRegs = 1;
1530 }
1531
1532 return true;
1533 }
1534
fastLowerCall(CallLoweringInfo & CLI)1535 bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1536 CallingConv::ID CC = CLI.CallConv;
1537 bool IsTailCall = CLI.IsTailCall;
1538 bool IsVarArg = CLI.IsVarArg;
1539 const Value *Callee = CLI.Callee;
1540 const MCSymbol *Symbol = CLI.Symbol;
1541
1542 if (!Callee && !Symbol)
1543 return false;
1544
1545 // Allow SelectionDAG isel to handle tail calls and long calls.
1546 if (IsTailCall || Subtarget->useLongCalls())
1547 return false;
1548
1549 // Let SDISel handle vararg functions.
1550 if (IsVarArg)
1551 return false;
1552
1553 // If this is a PC-Rel function, let SDISel handle the call.
1554 if (Subtarget->isUsingPCRelativeCalls())
1555 return false;
1556
1557 // Handle simple calls for now, with legal return types and
1558 // those that can be extended.
1559 Type *RetTy = CLI.RetTy;
1560 MVT RetVT;
1561 if (RetTy->isVoidTy())
1562 RetVT = MVT::isVoid;
1563 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1564 RetVT != MVT::i8)
1565 return false;
1566 else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1567 // We can't handle boolean returns when CR bits are in use.
1568 return false;
1569
1570 // FIXME: No multi-register return values yet.
1571 if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1572 RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1573 RetVT != MVT::f64) {
1574 SmallVector<CCValAssign, 16> RVLocs;
1575 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1576 CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1577 if (RVLocs.size() > 1)
1578 return false;
1579 }
1580
1581 // Bail early if more than 8 arguments, as we only currently
1582 // handle arguments passed in registers.
1583 unsigned NumArgs = CLI.OutVals.size();
1584 if (NumArgs > 8)
1585 return false;
1586
1587 // Set up the argument vectors.
1588 SmallVector<Value*, 8> Args;
1589 SmallVector<Register, 8> ArgRegs;
1590 SmallVector<MVT, 8> ArgVTs;
1591 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1592
1593 Args.reserve(NumArgs);
1594 ArgRegs.reserve(NumArgs);
1595 ArgVTs.reserve(NumArgs);
1596 ArgFlags.reserve(NumArgs);
1597
1598 for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1599 // Only handle easy calls for now. It would be reasonably easy
1600 // to handle <= 8-byte structures passed ByVal in registers, but we
1601 // have to ensure they are right-justified in the register.
1602 ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1603 if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1604 return false;
1605
1606 Value *ArgValue = CLI.OutVals[i];
1607 Type *ArgTy = ArgValue->getType();
1608 MVT ArgVT;
1609 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1610 return false;
1611
1612 // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1613 // types, which is passed through vector register. Skip these types and
1614 // fallback to default SelectionDAG based selection.
1615 if (ArgVT.isVector() || ArgVT == MVT::f128)
1616 return false;
1617
1618 Register Arg = getRegForValue(ArgValue);
1619 if (!Arg)
1620 return false;
1621
1622 Args.push_back(ArgValue);
1623 ArgRegs.push_back(Arg);
1624 ArgVTs.push_back(ArgVT);
1625 ArgFlags.push_back(Flags);
1626 }
1627
1628 // Process the arguments.
1629 SmallVector<unsigned, 8> RegArgs;
1630 unsigned NumBytes;
1631
1632 if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1633 RegArgs, CC, NumBytes, IsVarArg))
1634 return false;
1635
1636 MachineInstrBuilder MIB;
1637 // FIXME: No handling for function pointers yet. This requires
1638 // implementing the function descriptor (OPD) setup.
1639 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1640 if (!GV) {
1641 // patchpoints are a special case; they always dispatch to a pointer value.
1642 // However, we don't actually want to generate the indirect call sequence
1643 // here (that will be generated, as necessary, during asm printing), and
1644 // the call we generate here will be erased by FastISel::selectPatchpoint,
1645 // so don't try very hard...
1646 if (CLI.IsPatchPoint)
1647 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::NOP));
1648 else
1649 return false;
1650 } else {
1651 // Build direct call with NOP for TOC restore.
1652 // FIXME: We can and should optimize away the NOP for local calls.
1653 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1654 TII.get(PPC::BL8_NOP));
1655 // Add callee.
1656 MIB.addGlobalAddress(GV);
1657 }
1658
1659 // Add implicit physical register uses to the call.
1660 for (unsigned Reg : RegArgs)
1661 MIB.addReg(Reg, RegState::Implicit);
1662
1663 // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1664 // into the call.
1665 PPCFuncInfo->setUsesTOCBasePtr();
1666 MIB.addReg(PPC::X2, RegState::Implicit);
1667
1668 // Add a register mask with the call-preserved registers. Proper
1669 // defs for return values will be added by setPhysRegsDeadExcept().
1670 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1671
1672 CLI.Call = MIB;
1673
1674 // Finish off the call including any return values.
1675 return finishCall(RetVT, CLI, NumBytes);
1676 }
1677
1678 // Attempt to fast-select a return instruction.
SelectRet(const Instruction * I)1679 bool PPCFastISel::SelectRet(const Instruction *I) {
1680
1681 if (!FuncInfo.CanLowerReturn)
1682 return false;
1683
1684 const ReturnInst *Ret = cast<ReturnInst>(I);
1685 const Function &F = *I->getParent()->getParent();
1686
1687 // Build a list of return value registers.
1688 SmallVector<Register, 4> RetRegs;
1689 CallingConv::ID CC = F.getCallingConv();
1690
1691 if (Ret->getNumOperands() > 0) {
1692 SmallVector<ISD::OutputArg, 4> Outs;
1693 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1694
1695 // Analyze operands of the call, assigning locations to each operand.
1696 SmallVector<CCValAssign, 16> ValLocs;
1697 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1698 CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1699 const Value *RV = Ret->getOperand(0);
1700
1701 // FIXME: Only one output register for now.
1702 if (ValLocs.size() > 1)
1703 return false;
1704
1705 // Special case for returning a constant integer of any size - materialize
1706 // the constant as an i64 and copy it to the return register.
1707 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
1708 CCValAssign &VA = ValLocs[0];
1709
1710 Register RetReg = VA.getLocReg();
1711 // We still need to worry about properly extending the sign. For example,
1712 // we could have only a single bit or a constant that needs zero
1713 // extension rather than sign extension. Make sure we pass the return
1714 // value extension property to integer materialization.
1715 Register SrcReg =
1716 PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1717
1718 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1719 TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1720
1721 RetRegs.push_back(RetReg);
1722
1723 } else {
1724 Register Reg = getRegForValue(RV);
1725
1726 if (!Reg)
1727 return false;
1728
1729 // Copy the result values into the output registers.
1730 for (unsigned i = 0; i < ValLocs.size(); ++i) {
1731
1732 CCValAssign &VA = ValLocs[i];
1733 assert(VA.isRegLoc() && "Can only return in registers!");
1734 RetRegs.push_back(VA.getLocReg());
1735 Register SrcReg = Reg + VA.getValNo();
1736
1737 EVT RVEVT = TLI.getValueType(DL, RV->getType());
1738 if (!RVEVT.isSimple())
1739 return false;
1740 MVT RVVT = RVEVT.getSimpleVT();
1741 MVT DestVT = VA.getLocVT();
1742
1743 if (RVVT != DestVT && RVVT != MVT::i8 &&
1744 RVVT != MVT::i16 && RVVT != MVT::i32)
1745 return false;
1746
1747 if (RVVT != DestVT) {
1748 switch (VA.getLocInfo()) {
1749 default:
1750 llvm_unreachable("Unknown loc info!");
1751 case CCValAssign::Full:
1752 llvm_unreachable("Full value assign but types don't match?");
1753 case CCValAssign::AExt:
1754 case CCValAssign::ZExt: {
1755 const TargetRegisterClass *RC =
1756 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1757 Register TmpReg = createResultReg(RC);
1758 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1759 return false;
1760 SrcReg = TmpReg;
1761 break;
1762 }
1763 case CCValAssign::SExt: {
1764 const TargetRegisterClass *RC =
1765 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1766 Register TmpReg = createResultReg(RC);
1767 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1768 return false;
1769 SrcReg = TmpReg;
1770 break;
1771 }
1772 }
1773 }
1774
1775 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1776 TII.get(TargetOpcode::COPY), RetRegs[i])
1777 .addReg(SrcReg);
1778 }
1779 }
1780 }
1781
1782 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1783 TII.get(PPC::BLR8));
1784
1785 for (Register Reg : RetRegs)
1786 MIB.addReg(Reg, RegState::Implicit);
1787
1788 return true;
1789 }
1790
1791 // Attempt to emit an integer extend of SrcReg into DestReg. Both
1792 // signed and zero extensions are supported. Return false if we
1793 // can't handle it.
PPCEmitIntExt(MVT SrcVT,Register SrcReg,MVT DestVT,Register DestReg,bool IsZExt)1794 bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
1795 Register DestReg, bool IsZExt) {
1796 if (DestVT != MVT::i32 && DestVT != MVT::i64)
1797 return false;
1798 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1799 return false;
1800
1801 // Signed extensions use EXTSB, EXTSH, EXTSW.
1802 if (!IsZExt) {
1803 unsigned Opc;
1804 if (SrcVT == MVT::i8)
1805 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1806 else if (SrcVT == MVT::i16)
1807 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1808 else {
1809 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1810 Opc = PPC::EXTSW_32_64;
1811 }
1812 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1813 .addReg(SrcReg);
1814
1815 // Unsigned 32-bit extensions use RLWINM.
1816 } else if (DestVT == MVT::i32) {
1817 unsigned MB;
1818 if (SrcVT == MVT::i8)
1819 MB = 24;
1820 else {
1821 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1822 MB = 16;
1823 }
1824 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLWINM),
1825 DestReg)
1826 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1827
1828 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1829 } else {
1830 unsigned MB;
1831 if (SrcVT == MVT::i8)
1832 MB = 56;
1833 else if (SrcVT == MVT::i16)
1834 MB = 48;
1835 else
1836 MB = 32;
1837 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1838 TII.get(PPC::RLDICL_32_64), DestReg)
1839 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1840 }
1841
1842 return true;
1843 }
1844
1845 // Attempt to fast-select an indirect branch instruction.
SelectIndirectBr(const Instruction * I)1846 bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1847 Register AddrReg = getRegForValue(I->getOperand(0));
1848 if (!AddrReg)
1849 return false;
1850
1851 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::MTCTR8))
1852 .addReg(AddrReg);
1853 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCTR8));
1854
1855 const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1856 for (const BasicBlock *SuccBB : IB->successors())
1857 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(SuccBB));
1858
1859 return true;
1860 }
1861
1862 // Attempt to fast-select an integer truncate instruction.
SelectTrunc(const Instruction * I)1863 bool PPCFastISel::SelectTrunc(const Instruction *I) {
1864 Value *Src = I->getOperand(0);
1865 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1866 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1867
1868 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1869 return false;
1870
1871 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1872 return false;
1873
1874 Register SrcReg = getRegForValue(Src);
1875 if (!SrcReg)
1876 return false;
1877
1878 // The only interesting case is when we need to switch register classes.
1879 if (SrcVT == MVT::i64)
1880 SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
1881
1882 updateValueMap(I, SrcReg);
1883 return true;
1884 }
1885
1886 // Attempt to fast-select an integer extend instruction.
SelectIntExt(const Instruction * I)1887 bool PPCFastISel::SelectIntExt(const Instruction *I) {
1888 Type *DestTy = I->getType();
1889 Value *Src = I->getOperand(0);
1890 Type *SrcTy = Src->getType();
1891
1892 bool IsZExt = isa<ZExtInst>(I);
1893 Register SrcReg = getRegForValue(Src);
1894 if (!SrcReg) return false;
1895
1896 EVT SrcEVT, DestEVT;
1897 SrcEVT = TLI.getValueType(DL, SrcTy, true);
1898 DestEVT = TLI.getValueType(DL, DestTy, true);
1899 if (!SrcEVT.isSimple())
1900 return false;
1901 if (!DestEVT.isSimple())
1902 return false;
1903
1904 MVT SrcVT = SrcEVT.getSimpleVT();
1905 MVT DestVT = DestEVT.getSimpleVT();
1906
1907 // If we know the register class needed for the result of this
1908 // instruction, use it. Otherwise pick the register class of the
1909 // correct size that does not contain X0/R0, since we don't know
1910 // whether downstream uses permit that assignment.
1911 Register AssignedReg = FuncInfo.ValueMap[I];
1912 const TargetRegisterClass *RC =
1913 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1914 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1915 &PPC::GPRC_and_GPRC_NOR0RegClass));
1916 Register ResultReg = createResultReg(RC);
1917
1918 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1919 return false;
1920
1921 updateValueMap(I, ResultReg);
1922 return true;
1923 }
1924
1925 // Attempt to fast-select an instruction that wasn't handled by
1926 // the table-generated machinery.
fastSelectInstruction(const Instruction * I)1927 bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1928
1929 switch (I->getOpcode()) {
1930 case Instruction::Load:
1931 return SelectLoad(I);
1932 case Instruction::Store:
1933 return SelectStore(I);
1934 case Instruction::Br:
1935 return SelectBranch(I);
1936 case Instruction::IndirectBr:
1937 return SelectIndirectBr(I);
1938 case Instruction::FPExt:
1939 return SelectFPExt(I);
1940 case Instruction::FPTrunc:
1941 return SelectFPTrunc(I);
1942 case Instruction::SIToFP:
1943 return SelectIToFP(I, /*IsSigned*/ true);
1944 case Instruction::UIToFP:
1945 return SelectIToFP(I, /*IsSigned*/ false);
1946 case Instruction::FPToSI:
1947 return SelectFPToI(I, /*IsSigned*/ true);
1948 case Instruction::FPToUI:
1949 return SelectFPToI(I, /*IsSigned*/ false);
1950 case Instruction::Add:
1951 return SelectBinaryIntOp(I, ISD::ADD);
1952 case Instruction::Or:
1953 return SelectBinaryIntOp(I, ISD::OR);
1954 case Instruction::Sub:
1955 return SelectBinaryIntOp(I, ISD::SUB);
1956 case Instruction::Ret:
1957 return SelectRet(I);
1958 case Instruction::Trunc:
1959 return SelectTrunc(I);
1960 case Instruction::ZExt:
1961 case Instruction::SExt:
1962 return SelectIntExt(I);
1963 // Here add other flavors of Instruction::XXX that automated
1964 // cases don't catch. For example, switches are terminators
1965 // that aren't yet handled.
1966 default:
1967 break;
1968 }
1969 return false;
1970 }
1971
1972 // Materialize a floating-point constant into a register, and return
1973 // the register number (or zero if we failed to handle it).
PPCMaterializeFP(const ConstantFP * CFP,MVT VT)1974 Register PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1975 // If this is a PC-Rel function, let SDISel handle constant pool.
1976 if (Subtarget->isUsingPCRelativeCalls())
1977 return Register();
1978
1979 // No plans to handle long double here.
1980 if (VT != MVT::f32 && VT != MVT::f64)
1981 return Register();
1982
1983 // All FP constants are loaded from the constant pool.
1984 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
1985 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
1986 const bool HasSPE = Subtarget->hasSPE();
1987 const TargetRegisterClass *RC;
1988 if (HasSPE)
1989 RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
1990 else
1991 RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
1992
1993 Register DestReg = createResultReg(RC);
1994 CodeModel::Model CModel = TM.getCodeModel();
1995
1996 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
1997 MachinePointerInfo::getConstantPool(*FuncInfo.MF),
1998 MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment);
1999
2000 unsigned Opc;
2001
2002 if (HasSPE)
2003 Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2004 else
2005 Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2006
2007 Register TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2008
2009 PPCFuncInfo->setUsesTOCBasePtr();
2010 // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2011 if (CModel == CodeModel::Small) {
2012 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocCPT),
2013 TmpReg)
2014 .addConstantPoolIndex(Idx).addReg(PPC::X2);
2015 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2016 .addImm(0).addReg(TmpReg).addMemOperand(MMO);
2017 } else {
2018 // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2019 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2020 TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
2021 // But for large code model, we must generate a LDtocL followed
2022 // by the LF[SD].
2023 if (CModel == CodeModel::Large) {
2024 Register TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2025 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2026 TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
2027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2028 .addImm(0)
2029 .addReg(TmpReg2);
2030 } else
2031 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2032 .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
2033 .addReg(TmpReg)
2034 .addMemOperand(MMO);
2035 }
2036
2037 return DestReg;
2038 }
2039
2040 // Materialize the address of a global value into a register, and return
2041 // the register number (or zero if we failed to handle it).
PPCMaterializeGV(const GlobalValue * GV,MVT VT)2042 Register PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2043 // If this is a PC-Rel function, let SDISel handle GV materialization.
2044 if (Subtarget->isUsingPCRelativeCalls())
2045 return Register();
2046
2047 assert(VT == MVT::i64 && "Non-address!");
2048 const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2049 Register DestReg = createResultReg(RC);
2050
2051 // Global values may be plain old object addresses, TLS object
2052 // addresses, constant pool entries, or jump tables. How we generate
2053 // code for these may depend on small, medium, or large code model.
2054 CodeModel::Model CModel = TM.getCodeModel();
2055
2056 // FIXME: Jump tables are not yet required because fast-isel doesn't
2057 // handle switches; if that changes, we need them as well. For now,
2058 // what follows assumes everything's a generic (or TLS) global address.
2059
2060 // FIXME: We don't yet handle the complexity of TLS.
2061 if (GV->isThreadLocal())
2062 return Register();
2063
2064 PPCFuncInfo->setUsesTOCBasePtr();
2065 bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2066 isa<GlobalVariable>(GV) &&
2067 cast<GlobalVariable>(GV)->hasAttribute("toc-data");
2068
2069 // For small code model, generate a simple TOC load.
2070 if (CModel == CodeModel::Small) {
2071 auto MIB = BuildMI(
2072 *FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2073 IsAIXTocData ? TII.get(PPC::ADDItoc8) : TII.get(PPC::LDtoc), DestReg);
2074 if (IsAIXTocData)
2075 MIB.addReg(PPC::X2).addGlobalAddress(GV);
2076 else
2077 MIB.addGlobalAddress(GV).addReg(PPC::X2);
2078 } else {
2079 // If the address is an externally defined symbol, a symbol with common
2080 // or externally available linkage, a non-local function address, or a
2081 // jump table address (not yet needed), or if we are generating code
2082 // for large code model, we generate:
2083 // LDtocL(GV, ADDIStocHA8(%x2, GV))
2084 // Otherwise we generate:
2085 // ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2086 // Either way, start with the ADDIStocHA8:
2087 Register HighPartReg = createResultReg(RC);
2088 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2089 HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2090
2091 if (Subtarget->isGVIndirectSymbol(GV)) {
2092 assert(!IsAIXTocData && "TOC data should always be direct.");
2093 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2094 DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2095 } else {
2096 // Otherwise generate the ADDItocL8.
2097 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL8),
2098 DestReg)
2099 .addReg(HighPartReg)
2100 .addGlobalAddress(GV);
2101 }
2102 }
2103
2104 return DestReg;
2105 }
2106
2107 // Materialize a 32-bit integer constant into a register, and return
2108 // the register number (or zero if we failed to handle it).
PPCMaterialize32BitInt(int64_t Imm,const TargetRegisterClass * RC)2109 Register PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2110 const TargetRegisterClass *RC) {
2111 unsigned Lo = Imm & 0xFFFF;
2112 unsigned Hi = (Imm >> 16) & 0xFFFF;
2113
2114 Register ResultReg = createResultReg(RC);
2115 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2116
2117 if (isInt<16>(Imm))
2118 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2119 TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2120 .addImm(Imm);
2121 else if (Lo) {
2122 // Both Lo and Hi have nonzero bits.
2123 Register TmpReg = createResultReg(RC);
2124 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2125 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2126 .addImm(Hi);
2127 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2128 TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2129 .addReg(TmpReg).addImm(Lo);
2130 } else
2131 // Just Hi bits.
2132 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2133 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2134 .addImm(Hi);
2135
2136 return ResultReg;
2137 }
2138
2139 // Materialize a 64-bit integer constant into a register, and return
2140 // the register number (or zero if we failed to handle it).
PPCMaterialize64BitInt(int64_t Imm,const TargetRegisterClass * RC)2141 Register PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2142 const TargetRegisterClass *RC) {
2143 unsigned Remainder = 0;
2144 unsigned Shift = 0;
2145
2146 // If the value doesn't fit in 32 bits, see if we can shift it
2147 // so that it fits in 32 bits.
2148 if (!isInt<32>(Imm)) {
2149 Shift = llvm::countr_zero<uint64_t>(Imm);
2150 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2151
2152 if (isInt<32>(ImmSh))
2153 Imm = ImmSh;
2154 else {
2155 Remainder = Imm;
2156 Shift = 32;
2157 Imm >>= 32;
2158 }
2159 }
2160
2161 // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2162 // (if not shifted).
2163 Register TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2164 if (!Shift)
2165 return TmpReg1;
2166
2167 // If upper 32 bits were not zero, we've built them and need to shift
2168 // them into place.
2169 Register TmpReg2;
2170 if (Imm) {
2171 TmpReg2 = createResultReg(RC);
2172 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLDICR),
2173 TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2174 } else
2175 TmpReg2 = TmpReg1;
2176
2177 Register TmpReg3;
2178 unsigned Hi, Lo;
2179 if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2180 TmpReg3 = createResultReg(RC);
2181 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORIS8),
2182 TmpReg3).addReg(TmpReg2).addImm(Hi);
2183 } else
2184 TmpReg3 = TmpReg2;
2185
2186 if ((Lo = Remainder & 0xFFFF)) {
2187 Register ResultReg = createResultReg(RC);
2188 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORI8),
2189 ResultReg).addReg(TmpReg3).addImm(Lo);
2190 return ResultReg;
2191 }
2192
2193 return TmpReg3;
2194 }
2195
2196 // Materialize an integer constant into a register, and return
2197 // the register number (or zero if we failed to handle it).
PPCMaterializeInt(const ConstantInt * CI,MVT VT,bool UseSExt)2198 Register PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2199 bool UseSExt) {
2200 // If we're using CR bit registers for i1 values, handle that as a special
2201 // case first.
2202 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2203 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2204 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2205 TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2206 return ImmReg;
2207 }
2208
2209 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2210 VT != MVT::i1)
2211 return Register();
2212
2213 const TargetRegisterClass *RC =
2214 ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2215 int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2216
2217 // If the constant is in range, use a load-immediate.
2218 // Since LI will sign extend the constant we need to make sure that for
2219 // our zeroext constants that the sign extended constant fits into 16-bits -
2220 // a range of 0..0x7fff.
2221 if (isInt<16>(Imm)) {
2222 unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2223 Register ImmReg = createResultReg(RC);
2224 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ImmReg)
2225 .addImm(Imm);
2226 return ImmReg;
2227 }
2228
2229 // Construct the constant piecewise.
2230 if (VT == MVT::i64)
2231 return PPCMaterialize64BitInt(Imm, RC);
2232 else if (VT == MVT::i32)
2233 return PPCMaterialize32BitInt(Imm, RC);
2234
2235 return Register();
2236 }
2237
2238 // Materialize a constant into a register, and return the register
2239 // number (or zero if we failed to handle it).
fastMaterializeConstant(const Constant * C)2240 Register PPCFastISel::fastMaterializeConstant(const Constant *C) {
2241 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2242
2243 // Only handle simple types.
2244 if (!CEVT.isSimple())
2245 return Register();
2246 MVT VT = CEVT.getSimpleVT();
2247
2248 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2249 return PPCMaterializeFP(CFP, VT);
2250 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2251 return PPCMaterializeGV(GV, VT);
2252 else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2253 // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2254 // assumes that constant PHI operands will be zero extended, and failure to
2255 // match that assumption will cause problems if we sign extend here but
2256 // some user of a PHI is in a block for which we fall back to full SDAG
2257 // instruction selection.
2258 return PPCMaterializeInt(CI, VT, false);
2259
2260 return Register();
2261 }
2262
2263 // Materialize the address created by an alloca into a register, and
2264 // return the register number (or zero if we failed to handle it).
fastMaterializeAlloca(const AllocaInst * AI)2265 Register PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2266 DenseMap<const AllocaInst *, int>::iterator SI =
2267 FuncInfo.StaticAllocaMap.find(AI);
2268
2269 // Don't handle dynamic allocas.
2270 if (SI == FuncInfo.StaticAllocaMap.end())
2271 return Register();
2272
2273 MVT VT;
2274 if (!isLoadTypeLegal(AI->getType(), VT))
2275 return Register();
2276
2277 if (SI != FuncInfo.StaticAllocaMap.end()) {
2278 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2279 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
2280 ResultReg).addFrameIndex(SI->second).addImm(0);
2281 return ResultReg;
2282 }
2283
2284 return Register();
2285 }
2286
2287 // Fold loads into extends when possible.
2288 // FIXME: We can have multiple redundant extend/trunc instructions
2289 // following a load. The folding only picks up one. Extend this
2290 // to check subsequent instructions for the same pattern and remove
2291 // them. Thus ResultReg should be the def reg for the last redundant
2292 // instruction in a chain, and all intervening instructions can be
2293 // removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2294 // to add ELF64-NOT: rldicl to the appropriate tests when this works.
tryToFoldLoadIntoMI(MachineInstr * MI,unsigned OpNo,const LoadInst * LI)2295 bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2296 const LoadInst *LI) {
2297 // Verify we have a legal type before going any further.
2298 MVT VT;
2299 if (!isLoadTypeLegal(LI->getType(), VT))
2300 return false;
2301
2302 // Combine load followed by zero- or sign-extend.
2303 bool IsZExt = false;
2304 switch(MI->getOpcode()) {
2305 default:
2306 return false;
2307
2308 case PPC::RLDICL:
2309 case PPC::RLDICL_32_64: {
2310 IsZExt = true;
2311 unsigned MB = MI->getOperand(3).getImm();
2312 if ((VT == MVT::i8 && MB <= 56) ||
2313 (VT == MVT::i16 && MB <= 48) ||
2314 (VT == MVT::i32 && MB <= 32))
2315 break;
2316 return false;
2317 }
2318
2319 case PPC::RLWINM:
2320 case PPC::RLWINM8: {
2321 IsZExt = true;
2322 unsigned MB = MI->getOperand(3).getImm();
2323 if ((VT == MVT::i8 && MB <= 24) ||
2324 (VT == MVT::i16 && MB <= 16))
2325 break;
2326 return false;
2327 }
2328
2329 case PPC::EXTSB:
2330 case PPC::EXTSB8:
2331 case PPC::EXTSB8_32_64:
2332 /* There is no sign-extending load-byte instruction. */
2333 return false;
2334
2335 case PPC::EXTSH:
2336 case PPC::EXTSH8:
2337 case PPC::EXTSH8_32_64: {
2338 if (VT != MVT::i16 && VT != MVT::i8)
2339 return false;
2340 break;
2341 }
2342
2343 case PPC::EXTSW:
2344 case PPC::EXTSW_32:
2345 case PPC::EXTSW_32_64: {
2346 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2347 return false;
2348 break;
2349 }
2350 }
2351
2352 // See if we can handle this address.
2353 Address Addr;
2354 if (!PPCComputeAddress(LI->getOperand(0), Addr))
2355 return false;
2356
2357 Register ResultReg = MI->getOperand(0).getReg();
2358
2359 if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
2360 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2361 return false;
2362
2363 MachineBasicBlock::iterator I(MI);
2364 removeDeadCode(I, std::next(I));
2365 return true;
2366 }
2367
2368 // Attempt to lower call arguments in a faster way than done by
2369 // the selection DAG code.
fastLowerArguments()2370 bool PPCFastISel::fastLowerArguments() {
2371 // Defer to normal argument lowering for now. It's reasonably
2372 // efficient. Consider doing something like ARM to handle the
2373 // case where all args fit in registers, no varargs, no float
2374 // or vector args.
2375 return false;
2376 }
2377
2378 // Handle materializing integer constants into a register. This is not
2379 // automatically generated for PowerPC, so must be explicitly created here.
fastEmit_i(MVT Ty,MVT VT,unsigned Opc,uint64_t Imm)2380 Register PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2381
2382 if (Opc != ISD::Constant)
2383 return Register();
2384
2385 // If we're using CR bit registers for i1 values, handle that as a special
2386 // case first.
2387 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2388 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2389 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2390 TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2391 return ImmReg;
2392 }
2393
2394 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2395 VT != MVT::i1)
2396 return Register();
2397
2398 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2399 &PPC::GPRCRegClass);
2400 if (VT == MVT::i64)
2401 return PPCMaterialize64BitInt(Imm, RC);
2402 else
2403 return PPCMaterialize32BitInt(Imm, RC);
2404 }
2405
2406 // Override for ADDI and ADDI8 to set the correct register class
2407 // on RHS operand 0. The automatic infrastructure naively assumes
2408 // GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2409 // for these cases. At the moment, none of the other automatically
2410 // generated RI instructions require special treatment. However, once
2411 // SelectSelect is implemented, "isel" requires similar handling.
2412 //
2413 // Also be conservative about the output register class. Avoid
2414 // assigning R0 or X0 to the output register for GPRC and G8RC
2415 // register classes, as any such result could be used in ADDI, etc.,
2416 // where those regs have another meaning.
fastEmitInst_ri(unsigned MachineInstOpcode,const TargetRegisterClass * RC,Register Op0,uint64_t Imm)2417 Register PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2418 const TargetRegisterClass *RC,
2419 Register Op0, uint64_t Imm) {
2420 if (MachineInstOpcode == PPC::ADDI)
2421 MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2422 else if (MachineInstOpcode == PPC::ADDI8)
2423 MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2424
2425 const TargetRegisterClass *UseRC =
2426 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2427 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2428
2429 return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, Op0, Imm);
2430 }
2431
2432 // Override for instructions with one register operand to avoid use of
2433 // R0/X0. The automatic infrastructure isn't aware of the context so
2434 // we must be conservative.
fastEmitInst_r(unsigned MachineInstOpcode,const TargetRegisterClass * RC,Register Op0)2435 Register PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2436 const TargetRegisterClass *RC,
2437 Register Op0) {
2438 const TargetRegisterClass *UseRC =
2439 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2440 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2441
2442 return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0);
2443 }
2444
2445 // Override for instructions with two register operands to avoid use
2446 // of R0/X0. The automatic infrastructure isn't aware of the context
2447 // so we must be conservative.
fastEmitInst_rr(unsigned MachineInstOpcode,const TargetRegisterClass * RC,Register Op0,Register Op1)2448 Register PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2449 const TargetRegisterClass *RC,
2450 Register Op0, Register Op1) {
2451 const TargetRegisterClass *UseRC =
2452 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2453 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2454
2455 return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op1);
2456 }
2457
2458 namespace llvm {
2459 // Create the fast instruction selector for PowerPC64 ELF.
createFastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)2460 FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
2461 const TargetLibraryInfo *LibInfo) {
2462 // Only available on 64-bit for now.
2463 const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2464 if (Subtarget.isPPC64())
2465 return new PPCFastISel(FuncInfo, LibInfo);
2466 return nullptr;
2467 }
2468 }
2469