1 //=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the table-generated and custom routines for the AArch64 10 // Calling Convention. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64CallingConvention.h" 15 #include "AArch64.h" 16 #include "AArch64InstrInfo.h" 17 #include "AArch64Subtarget.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/TargetInstrInfo.h" 20 #include "llvm/IR/CallingConv.h" 21 using namespace llvm; 22 23 static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, 24 AArch64::X3, AArch64::X4, AArch64::X5, 25 AArch64::X6, AArch64::X7}; 26 static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2, 27 AArch64::H3, AArch64::H4, AArch64::H5, 28 AArch64::H6, AArch64::H7}; 29 static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, 30 AArch64::S3, AArch64::S4, AArch64::S5, 31 AArch64::S6, AArch64::S7}; 32 static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, 33 AArch64::D3, AArch64::D4, AArch64::D5, 34 AArch64::D6, AArch64::D7}; 35 static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, 36 AArch64::Q3, AArch64::Q4, AArch64::Q5, 37 AArch64::Q6, AArch64::Q7}; 38 static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2, 39 AArch64::Z3, AArch64::Z4, AArch64::Z5, 40 AArch64::Z6, AArch64::Z7}; 41 42 static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers, 43 MVT LocVT, ISD::ArgFlagsTy &ArgFlags, 44 CCState &State, Align SlotAlign) { 45 if (LocVT.isScalableVector()) { 46 const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( 47 State.getMachineFunction().getSubtarget()); 48 const AArch64TargetLowering *TLI = Subtarget.getTargetLowering(); 49 50 // We are about to reinvoke the CCAssignFn auto-generated handler. If we 51 // don't unset these flags we will get stuck in an infinite loop forever 52 // invoking the custom handler. 53 ArgFlags.setInConsecutiveRegs(false); 54 ArgFlags.setInConsecutiveRegsLast(false); 55 56 // The calling convention for passing SVE tuples states that in the event 57 // we cannot allocate enough registers for the tuple we should still leave 58 // any remaining registers unallocated. However, when we call the 59 // CCAssignFn again we want it to behave as if all remaining registers are 60 // allocated. This will force the code to pass the tuple indirectly in 61 // accordance with the PCS. 62 bool RegsAllocated[8]; 63 for (int I = 0; I < 8; I++) { 64 RegsAllocated[I] = State.isAllocated(ZRegList[I]); 65 State.AllocateReg(ZRegList[I]); 66 } 67 68 auto &It = PendingMembers[0]; 69 CCAssignFn *AssignFn = 70 TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false); 71 if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full, 72 ArgFlags, State)) 73 llvm_unreachable("Call operand has unhandled type"); 74 75 // Return the flags to how they were before. 76 ArgFlags.setInConsecutiveRegs(true); 77 ArgFlags.setInConsecutiveRegsLast(true); 78 79 // Return the register state back to how it was before, leaving any 80 // unallocated registers available for other smaller types. 81 for (int I = 0; I < 8; I++) 82 if (!RegsAllocated[I]) 83 State.DeallocateReg(ZRegList[I]); 84 85 // All pending members have now been allocated 86 PendingMembers.clear(); 87 return true; 88 } 89 90 unsigned Size = LocVT.getSizeInBits() / 8; 91 const Align StackAlign = 92 State.getMachineFunction().getDataLayout().getStackAlignment(); 93 const Align OrigAlign = ArgFlags.getNonZeroOrigAlign(); 94 const Align Alignment = std::min(OrigAlign, StackAlign); 95 96 for (auto &It : PendingMembers) { 97 It.convertToMem(State.AllocateStack(Size, std::max(Alignment, SlotAlign))); 98 State.addLoc(It); 99 SlotAlign = Align(1); 100 } 101 102 // All pending members have now been allocated 103 PendingMembers.clear(); 104 return true; 105 } 106 107 /// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An 108 /// [N x Ty] type must still be contiguous in memory though. 109 static bool CC_AArch64_Custom_Stack_Block( 110 unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, 111 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 112 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); 113 114 // Add the argument to the list to be allocated once we know the size of the 115 // block. 116 PendingMembers.push_back( 117 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 118 119 if (!ArgFlags.isInConsecutiveRegsLast()) 120 return true; 121 122 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, Align(8)); 123 } 124 125 /// Given an [N x Ty] block, it should be passed in a consecutive sequence of 126 /// registers. If no such sequence is available, mark the rest of the registers 127 /// of that type as used and place the argument on the stack. 128 static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 129 CCValAssign::LocInfo &LocInfo, 130 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 131 const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( 132 State.getMachineFunction().getSubtarget()); 133 bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO(); 134 135 // Try to allocate a contiguous block of registers, each of the correct 136 // size to hold one member. 137 ArrayRef<MCPhysReg> RegList; 138 if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32)) 139 RegList = XRegList; 140 else if (LocVT.SimpleTy == MVT::f16) 141 RegList = HRegList; 142 else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector()) 143 RegList = SRegList; 144 else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector()) 145 RegList = DRegList; 146 else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector()) 147 RegList = QRegList; 148 else if (LocVT.isScalableVector()) 149 RegList = ZRegList; 150 else { 151 // Not an array we want to split up after all. 152 return false; 153 } 154 155 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); 156 157 // Add the argument to the list to be allocated once we know the size of the 158 // block. 159 PendingMembers.push_back( 160 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 161 162 if (!ArgFlags.isInConsecutiveRegsLast()) 163 return true; 164 165 // [N x i32] arguments get packed into x-registers on Darwin's arm64_32 166 // because that's how the armv7k Clang front-end emits small structs. 167 unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1; 168 unsigned RegResult = State.AllocateRegBlock( 169 RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg); 170 if (RegResult && EltsPerReg == 1) { 171 for (auto &It : PendingMembers) { 172 It.convertToReg(RegResult); 173 State.addLoc(It); 174 ++RegResult; 175 } 176 PendingMembers.clear(); 177 return true; 178 } else if (RegResult) { 179 assert(EltsPerReg == 2 && "unexpected ABI"); 180 bool UseHigh = false; 181 CCValAssign::LocInfo Info; 182 for (auto &It : PendingMembers) { 183 Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt; 184 State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult, 185 MVT::i64, Info)); 186 UseHigh = !UseHigh; 187 if (!UseHigh) 188 ++RegResult; 189 } 190 PendingMembers.clear(); 191 return true; 192 } 193 194 if (!LocVT.isScalableVector()) { 195 // Mark all regs in the class as unavailable 196 for (auto Reg : RegList) 197 State.AllocateReg(Reg); 198 } 199 200 const Align SlotAlign = Subtarget.isTargetDarwin() ? Align(1) : Align(8); 201 202 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign); 203 } 204 205 // TableGen provides definitions of the calling convention analysis entry 206 // points. 207 #include "AArch64GenCallingConv.inc" 208